Customer segmentation is the process of dividing the customers into groups based on common characterstics so that companies can understand and market to each group effectively and appropriately to understand their choices and preferences.
The purpose of segmenting customers is to determine how to correlate to customers to maximize their benefits. Perfectly done customer segmentation empowers marketers to interact with every customer in the best efficient approach
The dataset includes:
Customer ID Gender Age Annual Income Spending Score
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
data = pd.read_csv('Mall_Customers.csv')
data
| CustomerID | Gender | Age | Annual Income (k$) | Spending Score (1-100) | |
|---|---|---|---|---|---|
| 0 | 1 | Male | 19 | 15 | 39 |
| 1 | 2 | Male | 21 | 15 | 81 |
| 2 | 3 | Female | 20 | 16 | 6 |
| 3 | 4 | Female | 23 | 16 | 77 |
| 4 | 5 | Female | 31 | 17 | 40 |
| ... | ... | ... | ... | ... | ... |
| 195 | 196 | Female | 35 | 120 | 79 |
| 196 | 197 | Female | 45 | 126 | 28 |
| 197 | 198 | Male | 32 | 126 | 74 |
| 198 | 199 | Male | 32 | 137 | 18 |
| 199 | 200 | Male | 30 | 137 | 83 |
200 rows × 5 columns
data.head() #displays first few rows to understand the data
| CustomerID | Gender | Age | Annual Income (k$) | Spending Score (1-100) | |
|---|---|---|---|---|---|
| 0 | 1 | Male | 19 | 15 | 39 |
| 1 | 2 | Male | 21 | 15 | 81 |
| 2 | 3 | Female | 20 | 16 | 6 |
| 3 | 4 | Female | 23 | 16 | 77 |
| 4 | 5 | Female | 31 | 17 | 40 |
data.shape #shape is used to display the number of rows and columns. Here we have 200 rows and 5 columns
(200, 5)
data.info() #info is used to ger non-null count, datatypes of each feature
<class 'pandas.core.frame.DataFrame'> RangeIndex: 200 entries, 0 to 199 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 CustomerID 200 non-null int64 1 Gender 200 non-null object 2 Age 200 non-null int64 3 Annual Income (k$) 200 non-null int64 4 Spending Score (1-100) 200 non-null int64 dtypes: int64(4), object(1) memory usage: 7.9+ KB
data.describe() #describe is used to get summary statistics like count, mean, max, min, std
| CustomerID | Age | Annual Income (k$) | Spending Score (1-100) | |
|---|---|---|---|---|
| count | 200.000000 | 200.000000 | 200.000000 | 200.000000 |
| mean | 100.500000 | 38.850000 | 60.560000 | 50.200000 |
| std | 57.879185 | 13.969007 | 26.264721 | 25.823522 |
| min | 1.000000 | 18.000000 | 15.000000 | 1.000000 |
| 25% | 50.750000 | 28.750000 | 41.500000 | 34.750000 |
| 50% | 100.500000 | 36.000000 | 61.500000 | 50.000000 |
| 75% | 150.250000 | 49.000000 | 78.000000 | 73.000000 |
| max | 200.000000 | 70.000000 | 137.000000 | 99.000000 |
data.value_counts()
CustomerID Gender Age Annual Income (k$) Spending Score (1-100)
1 Male 19 15 39 1
138 Male 32 73 73 1
128 Male 40 71 95 1
129 Male 59 71 11 1
130 Male 38 71 75 1
..
70 Female 32 48 47 1
71 Male 70 49 55 1
72 Female 47 49 42 1
73 Female 60 50 49 1
200 Male 30 137 83 1
Length: 200, dtype: int64
data['Gender'].describe()
count 200 unique 2 top Female freq 112 Name: Gender, dtype: object
data['Gender'].value_counts()
Female 112 Male 88 Name: Gender, dtype: int64
x = data['Age']
y = data['Annual Income (k$)']
plt.scatter(x,y,s=50,c="black")
plt.title("Age and their annual incomes")
plt.xlabel("Age")
plt.ylabel("Annual Income") #We can clearely see that person with age 20 has got less annual income and person with age 30 has
#got more annual income
Text(0, 0.5, 'Annual Income')
plt.figure(figsize=(13,5))
sns.countplot(x =data['Age'])
plt.show() #customer with 18 yrs of age are 4 members and person with 19 yrs of age are 8 members and so on.....
plt.figure(figsize=(6,3))
plt.bar(data['Gender'],data['Annual Income (k$)'],width=0.2,color='purple')
plt.show() #Male is having more annual income nearly 137 and female is having less annual income nearly 125
plt.figure(figsize=(8,4))
plt.hist(data['Spending Score (1-100)'],histtype='barstacked',color='darkblue')
plt.title('Number of customers spending score')
plt.xlabel('Spending Score')
plt.ylabel('Count of people')
plt.show() #Nearly 16-17 customers spending score is between 0 to 16 or 17
plt.figure(figsize=(13,3))
count = 0
for x in [data['Age'],data['Annual Income (k$)'],data['Spending Score (1-100)']]:
count+=1
plt.subplot(1,4,count,frameon=True,facecolor='black',alpha=0.5)
plt.subplots_adjust(hspace =0.7 , wspace = 0.5)
sns.distplot(x,bins=20)
plt.show()
Handle Missing values
#Check for missing values
missingvalues = data.isnull().sum()
missingvalues #As we can see that all columns is not having any missing values
CustomerID 0 Gender 0 Age 0 Annual Income (k$) 0 Spending Score (1-100) 0 dtype: int64
Handle duplicate data
#Check for duplicate rows
duplicaterows = data[data.duplicated()]
duplicaterows #As we can see that there are no duplicate rows
| CustomerID | Gender | Age | Annual Income (k$) | Spending Score (1-100) |
|---|
pd.set_option('display.max_columns', None) # Show all columns
pd.set_option('display.max_rows', None)
duplicateani = data.duplicated(subset=['Annual Income (k$)'])
duplicateani #True indicates there are duplicate rows but no problem annual income can be same for more than one customers
0 False 1 True 2 False 3 True 4 False 5 True 6 False 7 True 8 False 9 True 10 True 11 True 12 False 13 True 14 True 15 True 16 False 17 True 18 False 19 True 20 False 21 True 22 False 23 True 24 False 25 True 26 True 27 True 28 False 29 True 30 False 31 True 32 False 33 True 34 True 35 True 36 False 37 True 38 False 39 True 40 False 41 True 42 False 43 True 44 True 45 True 46 False 47 True 48 True 49 True 50 False 51 True 52 False 53 True 54 True 55 True 56 False 57 True 58 False 59 True 60 True 61 True 62 False 63 True 64 False 65 True 66 True 67 True 68 True 69 True 70 False 71 True 72 False 73 True 74 False 75 True 76 True 77 True 78 True 79 True 80 True 81 True 82 True 83 True 84 True 85 True 86 False 87 True 88 False 89 True 90 False 91 True 92 False 93 True 94 True 95 True 96 True 97 True 98 False 99 True 100 False 101 True 102 True 103 True 104 True 105 True 106 False 107 True 108 True 109 True 110 True 111 True 112 False 113 True 114 False 115 True 116 True 117 True 118 False 119 True 120 True 121 True 122 False 123 True 124 False 125 True 126 False 127 True 128 True 129 True 130 True 131 True 132 False 133 True 134 False 135 True 136 True 137 True 138 False 139 True 140 False 141 True 142 False 143 True 144 False 145 True 146 True 147 True 148 False 149 True 150 True 151 True 152 True 153 True 154 True 155 True 156 True 157 True 158 True 159 True 160 False 161 True 162 False 163 True 164 False 165 True 166 False 167 True 168 False 169 True 170 True 171 True 172 True 173 True 174 False 175 True 176 True 177 True 178 False 179 True 180 False 181 True 182 False 183 True 184 False 185 True 186 False 187 True 188 False 189 True 190 True 191 True 192 False 193 True 194 False 195 True 196 False 197 True 198 False 199 True dtype: bool
Check for Outliers
plt.figure(figsize=(5,4))
sns.boxplot(y=data['Annual Income (k$)'])
plt.title("Annual Income")
plt.show() #Annual income outlier doesn't matter because some customers annual income may be more
plt.figure(figsize=(5,4))
sns.boxplot(y=data['Spending Score (1-100)'])
plt.title("Spending score")
plt.show() #There are no outliers in spending score
#drop the CustomerId column as it is not required
data.drop(columns="CustomerID",axis=1,inplace=True)
data
| Gender | Age | Annual Income (k$) | Spending Score (1-100) | |
|---|---|---|---|---|
| 0 | Male | 19 | 15 | 39 |
| 1 | Male | 21 | 15 | 81 |
| 2 | Female | 20 | 16 | 6 |
| 3 | Female | 23 | 16 | 77 |
| 4 | Female | 31 | 17 | 40 |
| 5 | Female | 22 | 17 | 76 |
| 6 | Female | 35 | 18 | 6 |
| 7 | Female | 23 | 18 | 94 |
| 8 | Male | 64 | 19 | 3 |
| 9 | Female | 30 | 19 | 72 |
| 10 | Male | 67 | 19 | 14 |
| 11 | Female | 35 | 19 | 99 |
| 12 | Female | 58 | 20 | 15 |
| 13 | Female | 24 | 20 | 77 |
| 14 | Male | 37 | 20 | 13 |
| 15 | Male | 22 | 20 | 79 |
| 16 | Female | 35 | 21 | 35 |
| 17 | Male | 20 | 21 | 66 |
| 18 | Male | 52 | 23 | 29 |
| 19 | Female | 35 | 23 | 98 |
| 20 | Male | 35 | 24 | 35 |
| 21 | Male | 25 | 24 | 73 |
| 22 | Female | 46 | 25 | 5 |
| 23 | Male | 31 | 25 | 73 |
| 24 | Female | 54 | 28 | 14 |
| 25 | Male | 29 | 28 | 82 |
| 26 | Female | 45 | 28 | 32 |
| 27 | Male | 35 | 28 | 61 |
| 28 | Female | 40 | 29 | 31 |
| 29 | Female | 23 | 29 | 87 |
| 30 | Male | 60 | 30 | 4 |
| 31 | Female | 21 | 30 | 73 |
| 32 | Male | 53 | 33 | 4 |
| 33 | Male | 18 | 33 | 92 |
| 34 | Female | 49 | 33 | 14 |
| 35 | Female | 21 | 33 | 81 |
| 36 | Female | 42 | 34 | 17 |
| 37 | Female | 30 | 34 | 73 |
| 38 | Female | 36 | 37 | 26 |
| 39 | Female | 20 | 37 | 75 |
| 40 | Female | 65 | 38 | 35 |
| 41 | Male | 24 | 38 | 92 |
| 42 | Male | 48 | 39 | 36 |
| 43 | Female | 31 | 39 | 61 |
| 44 | Female | 49 | 39 | 28 |
| 45 | Female | 24 | 39 | 65 |
| 46 | Female | 50 | 40 | 55 |
| 47 | Female | 27 | 40 | 47 |
| 48 | Female | 29 | 40 | 42 |
| 49 | Female | 31 | 40 | 42 |
| 50 | Female | 49 | 42 | 52 |
| 51 | Male | 33 | 42 | 60 |
| 52 | Female | 31 | 43 | 54 |
| 53 | Male | 59 | 43 | 60 |
| 54 | Female | 50 | 43 | 45 |
| 55 | Male | 47 | 43 | 41 |
| 56 | Female | 51 | 44 | 50 |
| 57 | Male | 69 | 44 | 46 |
| 58 | Female | 27 | 46 | 51 |
| 59 | Male | 53 | 46 | 46 |
| 60 | Male | 70 | 46 | 56 |
| 61 | Male | 19 | 46 | 55 |
| 62 | Female | 67 | 47 | 52 |
| 63 | Female | 54 | 47 | 59 |
| 64 | Male | 63 | 48 | 51 |
| 65 | Male | 18 | 48 | 59 |
| 66 | Female | 43 | 48 | 50 |
| 67 | Female | 68 | 48 | 48 |
| 68 | Male | 19 | 48 | 59 |
| 69 | Female | 32 | 48 | 47 |
| 70 | Male | 70 | 49 | 55 |
| 71 | Female | 47 | 49 | 42 |
| 72 | Female | 60 | 50 | 49 |
| 73 | Female | 60 | 50 | 56 |
| 74 | Male | 59 | 54 | 47 |
| 75 | Male | 26 | 54 | 54 |
| 76 | Female | 45 | 54 | 53 |
| 77 | Male | 40 | 54 | 48 |
| 78 | Female | 23 | 54 | 52 |
| 79 | Female | 49 | 54 | 42 |
| 80 | Male | 57 | 54 | 51 |
| 81 | Male | 38 | 54 | 55 |
| 82 | Male | 67 | 54 | 41 |
| 83 | Female | 46 | 54 | 44 |
| 84 | Female | 21 | 54 | 57 |
| 85 | Male | 48 | 54 | 46 |
| 86 | Female | 55 | 57 | 58 |
| 87 | Female | 22 | 57 | 55 |
| 88 | Female | 34 | 58 | 60 |
| 89 | Female | 50 | 58 | 46 |
| 90 | Female | 68 | 59 | 55 |
| 91 | Male | 18 | 59 | 41 |
| 92 | Male | 48 | 60 | 49 |
| 93 | Female | 40 | 60 | 40 |
| 94 | Female | 32 | 60 | 42 |
| 95 | Male | 24 | 60 | 52 |
| 96 | Female | 47 | 60 | 47 |
| 97 | Female | 27 | 60 | 50 |
| 98 | Male | 48 | 61 | 42 |
| 99 | Male | 20 | 61 | 49 |
| 100 | Female | 23 | 62 | 41 |
| 101 | Female | 49 | 62 | 48 |
| 102 | Male | 67 | 62 | 59 |
| 103 | Male | 26 | 62 | 55 |
| 104 | Male | 49 | 62 | 56 |
| 105 | Female | 21 | 62 | 42 |
| 106 | Female | 66 | 63 | 50 |
| 107 | Male | 54 | 63 | 46 |
| 108 | Male | 68 | 63 | 43 |
| 109 | Male | 66 | 63 | 48 |
| 110 | Male | 65 | 63 | 52 |
| 111 | Female | 19 | 63 | 54 |
| 112 | Female | 38 | 64 | 42 |
| 113 | Male | 19 | 64 | 46 |
| 114 | Female | 18 | 65 | 48 |
| 115 | Female | 19 | 65 | 50 |
| 116 | Female | 63 | 65 | 43 |
| 117 | Female | 49 | 65 | 59 |
| 118 | Female | 51 | 67 | 43 |
| 119 | Female | 50 | 67 | 57 |
| 120 | Male | 27 | 67 | 56 |
| 121 | Female | 38 | 67 | 40 |
| 122 | Female | 40 | 69 | 58 |
| 123 | Male | 39 | 69 | 91 |
| 124 | Female | 23 | 70 | 29 |
| 125 | Female | 31 | 70 | 77 |
| 126 | Male | 43 | 71 | 35 |
| 127 | Male | 40 | 71 | 95 |
| 128 | Male | 59 | 71 | 11 |
| 129 | Male | 38 | 71 | 75 |
| 130 | Male | 47 | 71 | 9 |
| 131 | Male | 39 | 71 | 75 |
| 132 | Female | 25 | 72 | 34 |
| 133 | Female | 31 | 72 | 71 |
| 134 | Male | 20 | 73 | 5 |
| 135 | Female | 29 | 73 | 88 |
| 136 | Female | 44 | 73 | 7 |
| 137 | Male | 32 | 73 | 73 |
| 138 | Male | 19 | 74 | 10 |
| 139 | Female | 35 | 74 | 72 |
| 140 | Female | 57 | 75 | 5 |
| 141 | Male | 32 | 75 | 93 |
| 142 | Female | 28 | 76 | 40 |
| 143 | Female | 32 | 76 | 87 |
| 144 | Male | 25 | 77 | 12 |
| 145 | Male | 28 | 77 | 97 |
| 146 | Male | 48 | 77 | 36 |
| 147 | Female | 32 | 77 | 74 |
| 148 | Female | 34 | 78 | 22 |
| 149 | Male | 34 | 78 | 90 |
| 150 | Male | 43 | 78 | 17 |
| 151 | Male | 39 | 78 | 88 |
| 152 | Female | 44 | 78 | 20 |
| 153 | Female | 38 | 78 | 76 |
| 154 | Female | 47 | 78 | 16 |
| 155 | Female | 27 | 78 | 89 |
| 156 | Male | 37 | 78 | 1 |
| 157 | Female | 30 | 78 | 78 |
| 158 | Male | 34 | 78 | 1 |
| 159 | Female | 30 | 78 | 73 |
| 160 | Female | 56 | 79 | 35 |
| 161 | Female | 29 | 79 | 83 |
| 162 | Male | 19 | 81 | 5 |
| 163 | Female | 31 | 81 | 93 |
| 164 | Male | 50 | 85 | 26 |
| 165 | Female | 36 | 85 | 75 |
| 166 | Male | 42 | 86 | 20 |
| 167 | Female | 33 | 86 | 95 |
| 168 | Female | 36 | 87 | 27 |
| 169 | Male | 32 | 87 | 63 |
| 170 | Male | 40 | 87 | 13 |
| 171 | Male | 28 | 87 | 75 |
| 172 | Male | 36 | 87 | 10 |
| 173 | Male | 36 | 87 | 92 |
| 174 | Female | 52 | 88 | 13 |
| 175 | Female | 30 | 88 | 86 |
| 176 | Male | 58 | 88 | 15 |
| 177 | Male | 27 | 88 | 69 |
| 178 | Male | 59 | 93 | 14 |
| 179 | Male | 35 | 93 | 90 |
| 180 | Female | 37 | 97 | 32 |
| 181 | Female | 32 | 97 | 86 |
| 182 | Male | 46 | 98 | 15 |
| 183 | Female | 29 | 98 | 88 |
| 184 | Female | 41 | 99 | 39 |
| 185 | Male | 30 | 99 | 97 |
| 186 | Female | 54 | 101 | 24 |
| 187 | Male | 28 | 101 | 68 |
| 188 | Female | 41 | 103 | 17 |
| 189 | Female | 36 | 103 | 85 |
| 190 | Female | 34 | 103 | 23 |
| 191 | Female | 32 | 103 | 69 |
| 192 | Male | 33 | 113 | 8 |
| 193 | Female | 38 | 113 | 91 |
| 194 | Female | 47 | 120 | 16 |
| 195 | Female | 35 | 120 | 79 |
| 196 | Female | 45 | 126 | 28 |
| 197 | Male | 32 | 126 | 74 |
| 198 | Male | 32 | 137 | 18 |
| 199 | Male | 30 | 137 | 83 |
K-Means clustering is a type of unsupervised machine learning algorithm that is used when we have unlabeled data(i,e the data without defined categories or groups). The goal of this algorithm is to find groups in the data, with the number of groups represented by the variable k
For this algorithm to work, the number of clusters has to be known before. The K in K-means refers to number of clusters.
Customer segmentation according to Age and Spending Score
sns.displot(data,x='Age',element="step",hue='Age',legend=False,palette="dark") #Mostly customers are between 30-35 years of age
<seaborn.axisgrid.FacetGrid at 0x1e6cf5917d0>
sns.scatterplot(x=data['Age'],y=data['Spending Score (1-100)'],hue=data['Age'],size=data['Age'])
<Axes: xlabel='Age', ylabel='Spending Score (1-100)'>
d1 = data[['Age','Spending Score (1-100)']]
d1
| Age | Spending Score (1-100) | |
|---|---|---|
| 0 | 19 | 39 |
| 1 | 21 | 81 |
| 2 | 20 | 6 |
| 3 | 23 | 77 |
| 4 | 31 | 40 |
| 5 | 22 | 76 |
| 6 | 35 | 6 |
| 7 | 23 | 94 |
| 8 | 64 | 3 |
| 9 | 30 | 72 |
| 10 | 67 | 14 |
| 11 | 35 | 99 |
| 12 | 58 | 15 |
| 13 | 24 | 77 |
| 14 | 37 | 13 |
| 15 | 22 | 79 |
| 16 | 35 | 35 |
| 17 | 20 | 66 |
| 18 | 52 | 29 |
| 19 | 35 | 98 |
| 20 | 35 | 35 |
| 21 | 25 | 73 |
| 22 | 46 | 5 |
| 23 | 31 | 73 |
| 24 | 54 | 14 |
| 25 | 29 | 82 |
| 26 | 45 | 32 |
| 27 | 35 | 61 |
| 28 | 40 | 31 |
| 29 | 23 | 87 |
| 30 | 60 | 4 |
| 31 | 21 | 73 |
| 32 | 53 | 4 |
| 33 | 18 | 92 |
| 34 | 49 | 14 |
| 35 | 21 | 81 |
| 36 | 42 | 17 |
| 37 | 30 | 73 |
| 38 | 36 | 26 |
| 39 | 20 | 75 |
| 40 | 65 | 35 |
| 41 | 24 | 92 |
| 42 | 48 | 36 |
| 43 | 31 | 61 |
| 44 | 49 | 28 |
| 45 | 24 | 65 |
| 46 | 50 | 55 |
| 47 | 27 | 47 |
| 48 | 29 | 42 |
| 49 | 31 | 42 |
| 50 | 49 | 52 |
| 51 | 33 | 60 |
| 52 | 31 | 54 |
| 53 | 59 | 60 |
| 54 | 50 | 45 |
| 55 | 47 | 41 |
| 56 | 51 | 50 |
| 57 | 69 | 46 |
| 58 | 27 | 51 |
| 59 | 53 | 46 |
| 60 | 70 | 56 |
| 61 | 19 | 55 |
| 62 | 67 | 52 |
| 63 | 54 | 59 |
| 64 | 63 | 51 |
| 65 | 18 | 59 |
| 66 | 43 | 50 |
| 67 | 68 | 48 |
| 68 | 19 | 59 |
| 69 | 32 | 47 |
| 70 | 70 | 55 |
| 71 | 47 | 42 |
| 72 | 60 | 49 |
| 73 | 60 | 56 |
| 74 | 59 | 47 |
| 75 | 26 | 54 |
| 76 | 45 | 53 |
| 77 | 40 | 48 |
| 78 | 23 | 52 |
| 79 | 49 | 42 |
| 80 | 57 | 51 |
| 81 | 38 | 55 |
| 82 | 67 | 41 |
| 83 | 46 | 44 |
| 84 | 21 | 57 |
| 85 | 48 | 46 |
| 86 | 55 | 58 |
| 87 | 22 | 55 |
| 88 | 34 | 60 |
| 89 | 50 | 46 |
| 90 | 68 | 55 |
| 91 | 18 | 41 |
| 92 | 48 | 49 |
| 93 | 40 | 40 |
| 94 | 32 | 42 |
| 95 | 24 | 52 |
| 96 | 47 | 47 |
| 97 | 27 | 50 |
| 98 | 48 | 42 |
| 99 | 20 | 49 |
| 100 | 23 | 41 |
| 101 | 49 | 48 |
| 102 | 67 | 59 |
| 103 | 26 | 55 |
| 104 | 49 | 56 |
| 105 | 21 | 42 |
| 106 | 66 | 50 |
| 107 | 54 | 46 |
| 108 | 68 | 43 |
| 109 | 66 | 48 |
| 110 | 65 | 52 |
| 111 | 19 | 54 |
| 112 | 38 | 42 |
| 113 | 19 | 46 |
| 114 | 18 | 48 |
| 115 | 19 | 50 |
| 116 | 63 | 43 |
| 117 | 49 | 59 |
| 118 | 51 | 43 |
| 119 | 50 | 57 |
| 120 | 27 | 56 |
| 121 | 38 | 40 |
| 122 | 40 | 58 |
| 123 | 39 | 91 |
| 124 | 23 | 29 |
| 125 | 31 | 77 |
| 126 | 43 | 35 |
| 127 | 40 | 95 |
| 128 | 59 | 11 |
| 129 | 38 | 75 |
| 130 | 47 | 9 |
| 131 | 39 | 75 |
| 132 | 25 | 34 |
| 133 | 31 | 71 |
| 134 | 20 | 5 |
| 135 | 29 | 88 |
| 136 | 44 | 7 |
| 137 | 32 | 73 |
| 138 | 19 | 10 |
| 139 | 35 | 72 |
| 140 | 57 | 5 |
| 141 | 32 | 93 |
| 142 | 28 | 40 |
| 143 | 32 | 87 |
| 144 | 25 | 12 |
| 145 | 28 | 97 |
| 146 | 48 | 36 |
| 147 | 32 | 74 |
| 148 | 34 | 22 |
| 149 | 34 | 90 |
| 150 | 43 | 17 |
| 151 | 39 | 88 |
| 152 | 44 | 20 |
| 153 | 38 | 76 |
| 154 | 47 | 16 |
| 155 | 27 | 89 |
| 156 | 37 | 1 |
| 157 | 30 | 78 |
| 158 | 34 | 1 |
| 159 | 30 | 73 |
| 160 | 56 | 35 |
| 161 | 29 | 83 |
| 162 | 19 | 5 |
| 163 | 31 | 93 |
| 164 | 50 | 26 |
| 165 | 36 | 75 |
| 166 | 42 | 20 |
| 167 | 33 | 95 |
| 168 | 36 | 27 |
| 169 | 32 | 63 |
| 170 | 40 | 13 |
| 171 | 28 | 75 |
| 172 | 36 | 10 |
| 173 | 36 | 92 |
| 174 | 52 | 13 |
| 175 | 30 | 86 |
| 176 | 58 | 15 |
| 177 | 27 | 69 |
| 178 | 59 | 14 |
| 179 | 35 | 90 |
| 180 | 37 | 32 |
| 181 | 32 | 86 |
| 182 | 46 | 15 |
| 183 | 29 | 88 |
| 184 | 41 | 39 |
| 185 | 30 | 97 |
| 186 | 54 | 24 |
| 187 | 28 | 68 |
| 188 | 41 | 17 |
| 189 | 36 | 85 |
| 190 | 34 | 23 |
| 191 | 32 | 69 |
| 192 | 33 | 8 |
| 193 | 38 | 91 |
| 194 | 47 | 16 |
| 195 | 35 | 79 |
| 196 | 45 | 28 |
| 197 | 32 | 74 |
| 198 | 32 | 18 |
| 199 | 30 | 83 |
sc = StandardScaler()
Scaling all the values
Data1 = sc.fit_transform(d1)
Data1
array([[-1.42456879, -0.43480148],
[-1.28103541, 1.19570407],
[-1.3528021 , -1.71591298],
[-1.13750203, 1.04041783],
[-0.56336851, -0.39597992],
[-1.20926872, 1.00159627],
[-0.27630176, -1.71591298],
[-1.13750203, 1.70038436],
[ 1.80493225, -1.83237767],
[-0.6351352 , 0.84631002],
[ 2.02023231, -1.4053405 ],
[-0.27630176, 1.89449216],
[ 1.37433211, -1.36651894],
[-1.06573534, 1.04041783],
[-0.13276838, -1.44416206],
[-1.20926872, 1.11806095],
[-0.27630176, -0.59008772],
[-1.3528021 , 0.61338066],
[ 0.94373197, -0.82301709],
[-0.27630176, 1.8556706 ],
[-0.27630176, -0.59008772],
[-0.99396865, 0.88513158],
[ 0.51313183, -1.75473454],
[-0.56336851, 0.88513158],
[ 1.08726535, -1.4053405 ],
[-0.70690189, 1.23452563],
[ 0.44136514, -0.7065524 ],
[-0.27630176, 0.41927286],
[ 0.08253169, -0.74537397],
[-1.13750203, 1.42863343],
[ 1.51786549, -1.7935561 ],
[-1.28103541, 0.88513158],
[ 1.01549866, -1.7935561 ],
[-1.49633548, 1.62274124],
[ 0.7284319 , -1.4053405 ],
[-1.28103541, 1.19570407],
[ 0.22606507, -1.28887582],
[-0.6351352 , 0.88513158],
[-0.20453507, -0.93948177],
[-1.3528021 , 0.96277471],
[ 1.87669894, -0.59008772],
[-1.06573534, 1.62274124],
[ 0.65666521, -0.55126616],
[-0.56336851, 0.41927286],
[ 0.7284319 , -0.86183865],
[-1.06573534, 0.5745591 ],
[ 0.80019859, 0.18634349],
[-0.85043527, -0.12422899],
[-0.70690189, -0.3183368 ],
[-0.56336851, -0.3183368 ],
[ 0.7284319 , 0.06987881],
[-0.41983513, 0.38045129],
[-0.56336851, 0.14752193],
[ 1.4460988 , 0.38045129],
[ 0.80019859, -0.20187212],
[ 0.58489852, -0.35715836],
[ 0.87196528, -0.00776431],
[ 2.16376569, -0.16305055],
[-0.85043527, 0.03105725],
[ 1.01549866, -0.16305055],
[ 2.23553238, 0.22516505],
[-1.42456879, 0.18634349],
[ 2.02023231, 0.06987881],
[ 1.08726535, 0.34162973],
[ 1.73316556, 0.03105725],
[-1.49633548, 0.34162973],
[ 0.29783176, -0.00776431],
[ 2.091999 , -0.08540743],
[-1.42456879, 0.34162973],
[-0.49160182, -0.12422899],
[ 2.23553238, 0.18634349],
[ 0.58489852, -0.3183368 ],
[ 1.51786549, -0.04658587],
[ 1.51786549, 0.22516505],
[ 1.4460988 , -0.12422899],
[-0.92220196, 0.14752193],
[ 0.44136514, 0.10870037],
[ 0.08253169, -0.08540743],
[-1.13750203, 0.06987881],
[ 0.7284319 , -0.3183368 ],
[ 1.30256542, 0.03105725],
[-0.06100169, 0.18634349],
[ 2.02023231, -0.35715836],
[ 0.51313183, -0.24069368],
[-1.28103541, 0.26398661],
[ 0.65666521, -0.16305055],
[ 1.15903204, 0.30280817],
[-1.20926872, 0.18634349],
[-0.34806844, 0.38045129],
[ 0.80019859, -0.16305055],
[ 2.091999 , 0.18634349],
[-1.49633548, -0.35715836],
[ 0.65666521, -0.04658587],
[ 0.08253169, -0.39597992],
[-0.49160182, -0.3183368 ],
[-1.06573534, 0.06987881],
[ 0.58489852, -0.12422899],
[-0.85043527, -0.00776431],
[ 0.65666521, -0.3183368 ],
[-1.3528021 , -0.04658587],
[-1.13750203, -0.35715836],
[ 0.7284319 , -0.08540743],
[ 2.02023231, 0.34162973],
[-0.92220196, 0.18634349],
[ 0.7284319 , 0.22516505],
[-1.28103541, -0.3183368 ],
[ 1.94846562, -0.00776431],
[ 1.08726535, -0.16305055],
[ 2.091999 , -0.27951524],
[ 1.94846562, -0.08540743],
[ 1.87669894, 0.06987881],
[-1.42456879, 0.14752193],
[-0.06100169, -0.3183368 ],
[-1.42456879, -0.16305055],
[-1.49633548, -0.08540743],
[-1.42456879, -0.00776431],
[ 1.73316556, -0.27951524],
[ 0.7284319 , 0.34162973],
[ 0.87196528, -0.27951524],
[ 0.80019859, 0.26398661],
[-0.85043527, 0.22516505],
[-0.06100169, -0.39597992],
[ 0.08253169, 0.30280817],
[ 0.010765 , 1.58391968],
[-1.13750203, -0.82301709],
[-0.56336851, 1.04041783],
[ 0.29783176, -0.59008772],
[ 0.08253169, 1.73920592],
[ 1.4460988 , -1.52180518],
[-0.06100169, 0.96277471],
[ 0.58489852, -1.5994483 ],
[ 0.010765 , 0.96277471],
[-0.99396865, -0.62890928],
[-0.56336851, 0.80748846],
[-1.3528021 , -1.75473454],
[-0.70690189, 1.46745499],
[ 0.36959845, -1.67709142],
[-0.49160182, 0.88513158],
[-1.42456879, -1.56062674],
[-0.27630176, 0.84631002],
[ 1.30256542, -1.75473454],
[-0.49160182, 1.6615628 ],
[-0.77866858, -0.39597992],
[-0.49160182, 1.42863343],
[-0.99396865, -1.48298362],
[-0.77866858, 1.81684904],
[ 0.65666521, -0.55126616],
[-0.49160182, 0.92395314],
[-0.34806844, -1.09476801],
[-0.34806844, 1.54509812],
[ 0.29783176, -1.28887582],
[ 0.010765 , 1.46745499],
[ 0.36959845, -1.17241113],
[-0.06100169, 1.00159627],
[ 0.58489852, -1.32769738],
[-0.85043527, 1.50627656],
[-0.13276838, -1.91002079],
[-0.6351352 , 1.07923939],
[-0.34806844, -1.91002079],
[-0.6351352 , 0.88513158],
[ 1.23079873, -0.59008772],
[-0.70690189, 1.27334719],
[-1.42456879, -1.75473454],
[-0.56336851, 1.6615628 ],
[ 0.80019859, -0.93948177],
[-0.20453507, 0.96277471],
[ 0.22606507, -1.17241113],
[-0.41983513, 1.73920592],
[-0.20453507, -0.90066021],
[-0.49160182, 0.49691598],
[ 0.08253169, -1.44416206],
[-0.77866858, 0.96277471],
[-0.20453507, -1.56062674],
[-0.20453507, 1.62274124],
[ 0.94373197, -1.44416206],
[-0.6351352 , 1.38981187],
[ 1.37433211, -1.36651894],
[-0.85043527, 0.72984534],
[ 1.4460988 , -1.4053405 ],
[-0.27630176, 1.54509812],
[-0.13276838, -0.7065524 ],
[-0.49160182, 1.38981187],
[ 0.51313183, -1.36651894],
[-0.70690189, 1.46745499],
[ 0.15429838, -0.43480148],
[-0.6351352 , 1.81684904],
[ 1.08726535, -1.01712489],
[-0.77866858, 0.69102378],
[ 0.15429838, -1.28887582],
[-0.20453507, 1.35099031],
[-0.34806844, -1.05594645],
[-0.49160182, 0.72984534],
[-0.41983513, -1.63826986],
[-0.06100169, 1.58391968],
[ 0.58489852, -1.32769738],
[-0.27630176, 1.11806095],
[ 0.44136514, -0.86183865],
[-0.49160182, 0.92395314],
[-0.49160182, -1.25005425],
[-0.6351352 , 1.27334719]])
Finding the number of clusters using Elbow method
Elbow method is used to find optimal K value via graphical representation. It works by finding Within cluster sum of squares(WCSS) i.e, the sum of the square of the distance between the cluster points and a cluster centroid
The elbow graph shows WCSS values on y-axis corresponding to K values on x-axis. When we see the elbow shape in the graph, we pick the K value where elbow gets created in the graph
from sklearn.cluster import KMeans
wcss = []
for i in range(1,10):
km = KMeans(n_clusters=i)
km.fit_predict(Data1)
a = km.inertia_
wcss.append(a)
import plotly.express as px
import plotly.graph_objects as go
b = range(1,10)
px.line(x=b,y=wcss)
From the above graph it can be seen the number of clusters (K value) is 4
Building the model
km = KMeans(n_clusters=4)
prediction1 = km.fit_predict(Data1)
prediction1
array([2, 0, 2, 0, 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0, 1, 0, 2, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0, 3, 0, 3, 0,
1, 0, 3, 2, 2, 2, 3, 0, 2, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 3, 3, 2,
3, 3, 2, 2, 3, 3, 3, 3, 3, 2, 3, 2, 2, 3, 3, 2, 3, 3, 2, 3, 3, 2,
0, 3, 3, 2, 3, 2, 2, 2, 3, 2, 3, 2, 2, 3, 3, 2, 3, 2, 3, 3, 3, 3,
3, 2, 2, 2, 2, 2, 3, 3, 3, 3, 2, 2, 0, 0, 2, 0, 1, 0, 1, 0, 1, 0,
2, 0, 2, 0, 1, 0, 2, 0, 1, 0, 2, 0, 2, 0, 3, 0, 2, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 3, 0, 2, 0, 1, 0, 1, 0, 2, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1, 0, 1, 0, 2, 0, 1, 0, 1, 0, 1, 0,
2, 0])
data['age_cluster'] = prediction1
data
| Gender | Age | Annual Income (k$) | Spending Score (1-100) | age_cluster | |
|---|---|---|---|---|---|
| 0 | Male | 19 | 15 | 39 | 2 |
| 1 | Male | 21 | 15 | 81 | 0 |
| 2 | Female | 20 | 16 | 6 | 2 |
| 3 | Female | 23 | 16 | 77 | 0 |
| 4 | Female | 31 | 17 | 40 | 2 |
| 5 | Female | 22 | 17 | 76 | 0 |
| 6 | Female | 35 | 18 | 6 | 1 |
| 7 | Female | 23 | 18 | 94 | 0 |
| 8 | Male | 64 | 19 | 3 | 1 |
| 9 | Female | 30 | 19 | 72 | 0 |
| 10 | Male | 67 | 19 | 14 | 1 |
| 11 | Female | 35 | 19 | 99 | 0 |
| 12 | Female | 58 | 20 | 15 | 1 |
| 13 | Female | 24 | 20 | 77 | 0 |
| 14 | Male | 37 | 20 | 13 | 1 |
| 15 | Male | 22 | 20 | 79 | 0 |
| 16 | Female | 35 | 21 | 35 | 2 |
| 17 | Male | 20 | 21 | 66 | 0 |
| 18 | Male | 52 | 23 | 29 | 1 |
| 19 | Female | 35 | 23 | 98 | 0 |
| 20 | Male | 35 | 24 | 35 | 2 |
| 21 | Male | 25 | 24 | 73 | 0 |
| 22 | Female | 46 | 25 | 5 | 1 |
| 23 | Male | 31 | 25 | 73 | 0 |
| 24 | Female | 54 | 28 | 14 | 1 |
| 25 | Male | 29 | 28 | 82 | 0 |
| 26 | Female | 45 | 28 | 32 | 1 |
| 27 | Male | 35 | 28 | 61 | 0 |
| 28 | Female | 40 | 29 | 31 | 1 |
| 29 | Female | 23 | 29 | 87 | 0 |
| 30 | Male | 60 | 30 | 4 | 1 |
| 31 | Female | 21 | 30 | 73 | 0 |
| 32 | Male | 53 | 33 | 4 | 1 |
| 33 | Male | 18 | 33 | 92 | 0 |
| 34 | Female | 49 | 33 | 14 | 1 |
| 35 | Female | 21 | 33 | 81 | 0 |
| 36 | Female | 42 | 34 | 17 | 1 |
| 37 | Female | 30 | 34 | 73 | 0 |
| 38 | Female | 36 | 37 | 26 | 2 |
| 39 | Female | 20 | 37 | 75 | 0 |
| 40 | Female | 65 | 38 | 35 | 3 |
| 41 | Male | 24 | 38 | 92 | 0 |
| 42 | Male | 48 | 39 | 36 | 3 |
| 43 | Female | 31 | 39 | 61 | 0 |
| 44 | Female | 49 | 39 | 28 | 1 |
| 45 | Female | 24 | 39 | 65 | 0 |
| 46 | Female | 50 | 40 | 55 | 3 |
| 47 | Female | 27 | 40 | 47 | 2 |
| 48 | Female | 29 | 40 | 42 | 2 |
| 49 | Female | 31 | 40 | 42 | 2 |
| 50 | Female | 49 | 42 | 52 | 3 |
| 51 | Male | 33 | 42 | 60 | 0 |
| 52 | Female | 31 | 43 | 54 | 2 |
| 53 | Male | 59 | 43 | 60 | 3 |
| 54 | Female | 50 | 43 | 45 | 3 |
| 55 | Male | 47 | 43 | 41 | 3 |
| 56 | Female | 51 | 44 | 50 | 3 |
| 57 | Male | 69 | 44 | 46 | 3 |
| 58 | Female | 27 | 46 | 51 | 2 |
| 59 | Male | 53 | 46 | 46 | 3 |
| 60 | Male | 70 | 46 | 56 | 3 |
| 61 | Male | 19 | 46 | 55 | 2 |
| 62 | Female | 67 | 47 | 52 | 3 |
| 63 | Female | 54 | 47 | 59 | 3 |
| 64 | Male | 63 | 48 | 51 | 3 |
| 65 | Male | 18 | 48 | 59 | 2 |
| 66 | Female | 43 | 48 | 50 | 3 |
| 67 | Female | 68 | 48 | 48 | 3 |
| 68 | Male | 19 | 48 | 59 | 2 |
| 69 | Female | 32 | 48 | 47 | 2 |
| 70 | Male | 70 | 49 | 55 | 3 |
| 71 | Female | 47 | 49 | 42 | 3 |
| 72 | Female | 60 | 50 | 49 | 3 |
| 73 | Female | 60 | 50 | 56 | 3 |
| 74 | Male | 59 | 54 | 47 | 3 |
| 75 | Male | 26 | 54 | 54 | 2 |
| 76 | Female | 45 | 54 | 53 | 3 |
| 77 | Male | 40 | 54 | 48 | 2 |
| 78 | Female | 23 | 54 | 52 | 2 |
| 79 | Female | 49 | 54 | 42 | 3 |
| 80 | Male | 57 | 54 | 51 | 3 |
| 81 | Male | 38 | 54 | 55 | 2 |
| 82 | Male | 67 | 54 | 41 | 3 |
| 83 | Female | 46 | 54 | 44 | 3 |
| 84 | Female | 21 | 54 | 57 | 2 |
| 85 | Male | 48 | 54 | 46 | 3 |
| 86 | Female | 55 | 57 | 58 | 3 |
| 87 | Female | 22 | 57 | 55 | 2 |
| 88 | Female | 34 | 58 | 60 | 0 |
| 89 | Female | 50 | 58 | 46 | 3 |
| 90 | Female | 68 | 59 | 55 | 3 |
| 91 | Male | 18 | 59 | 41 | 2 |
| 92 | Male | 48 | 60 | 49 | 3 |
| 93 | Female | 40 | 60 | 40 | 2 |
| 94 | Female | 32 | 60 | 42 | 2 |
| 95 | Male | 24 | 60 | 52 | 2 |
| 96 | Female | 47 | 60 | 47 | 3 |
| 97 | Female | 27 | 60 | 50 | 2 |
| 98 | Male | 48 | 61 | 42 | 3 |
| 99 | Male | 20 | 61 | 49 | 2 |
| 100 | Female | 23 | 62 | 41 | 2 |
| 101 | Female | 49 | 62 | 48 | 3 |
| 102 | Male | 67 | 62 | 59 | 3 |
| 103 | Male | 26 | 62 | 55 | 2 |
| 104 | Male | 49 | 62 | 56 | 3 |
| 105 | Female | 21 | 62 | 42 | 2 |
| 106 | Female | 66 | 63 | 50 | 3 |
| 107 | Male | 54 | 63 | 46 | 3 |
| 108 | Male | 68 | 63 | 43 | 3 |
| 109 | Male | 66 | 63 | 48 | 3 |
| 110 | Male | 65 | 63 | 52 | 3 |
| 111 | Female | 19 | 63 | 54 | 2 |
| 112 | Female | 38 | 64 | 42 | 2 |
| 113 | Male | 19 | 64 | 46 | 2 |
| 114 | Female | 18 | 65 | 48 | 2 |
| 115 | Female | 19 | 65 | 50 | 2 |
| 116 | Female | 63 | 65 | 43 | 3 |
| 117 | Female | 49 | 65 | 59 | 3 |
| 118 | Female | 51 | 67 | 43 | 3 |
| 119 | Female | 50 | 67 | 57 | 3 |
| 120 | Male | 27 | 67 | 56 | 2 |
| 121 | Female | 38 | 67 | 40 | 2 |
| 122 | Female | 40 | 69 | 58 | 0 |
| 123 | Male | 39 | 69 | 91 | 0 |
| 124 | Female | 23 | 70 | 29 | 2 |
| 125 | Female | 31 | 70 | 77 | 0 |
| 126 | Male | 43 | 71 | 35 | 1 |
| 127 | Male | 40 | 71 | 95 | 0 |
| 128 | Male | 59 | 71 | 11 | 1 |
| 129 | Male | 38 | 71 | 75 | 0 |
| 130 | Male | 47 | 71 | 9 | 1 |
| 131 | Male | 39 | 71 | 75 | 0 |
| 132 | Female | 25 | 72 | 34 | 2 |
| 133 | Female | 31 | 72 | 71 | 0 |
| 134 | Male | 20 | 73 | 5 | 2 |
| 135 | Female | 29 | 73 | 88 | 0 |
| 136 | Female | 44 | 73 | 7 | 1 |
| 137 | Male | 32 | 73 | 73 | 0 |
| 138 | Male | 19 | 74 | 10 | 2 |
| 139 | Female | 35 | 74 | 72 | 0 |
| 140 | Female | 57 | 75 | 5 | 1 |
| 141 | Male | 32 | 75 | 93 | 0 |
| 142 | Female | 28 | 76 | 40 | 2 |
| 143 | Female | 32 | 76 | 87 | 0 |
| 144 | Male | 25 | 77 | 12 | 2 |
| 145 | Male | 28 | 77 | 97 | 0 |
| 146 | Male | 48 | 77 | 36 | 3 |
| 147 | Female | 32 | 77 | 74 | 0 |
| 148 | Female | 34 | 78 | 22 | 2 |
| 149 | Male | 34 | 78 | 90 | 0 |
| 150 | Male | 43 | 78 | 17 | 1 |
| 151 | Male | 39 | 78 | 88 | 0 |
| 152 | Female | 44 | 78 | 20 | 1 |
| 153 | Female | 38 | 78 | 76 | 0 |
| 154 | Female | 47 | 78 | 16 | 1 |
| 155 | Female | 27 | 78 | 89 | 0 |
| 156 | Male | 37 | 78 | 1 | 1 |
| 157 | Female | 30 | 78 | 78 | 0 |
| 158 | Male | 34 | 78 | 1 | 1 |
| 159 | Female | 30 | 78 | 73 | 0 |
| 160 | Female | 56 | 79 | 35 | 3 |
| 161 | Female | 29 | 79 | 83 | 0 |
| 162 | Male | 19 | 81 | 5 | 2 |
| 163 | Female | 31 | 81 | 93 | 0 |
| 164 | Male | 50 | 85 | 26 | 1 |
| 165 | Female | 36 | 85 | 75 | 0 |
| 166 | Male | 42 | 86 | 20 | 1 |
| 167 | Female | 33 | 86 | 95 | 0 |
| 168 | Female | 36 | 87 | 27 | 2 |
| 169 | Male | 32 | 87 | 63 | 0 |
| 170 | Male | 40 | 87 | 13 | 1 |
| 171 | Male | 28 | 87 | 75 | 0 |
| 172 | Male | 36 | 87 | 10 | 1 |
| 173 | Male | 36 | 87 | 92 | 0 |
| 174 | Female | 52 | 88 | 13 | 1 |
| 175 | Female | 30 | 88 | 86 | 0 |
| 176 | Male | 58 | 88 | 15 | 1 |
| 177 | Male | 27 | 88 | 69 | 0 |
| 178 | Male | 59 | 93 | 14 | 1 |
| 179 | Male | 35 | 93 | 90 | 0 |
| 180 | Female | 37 | 97 | 32 | 2 |
| 181 | Female | 32 | 97 | 86 | 0 |
| 182 | Male | 46 | 98 | 15 | 1 |
| 183 | Female | 29 | 98 | 88 | 0 |
| 184 | Female | 41 | 99 | 39 | 2 |
| 185 | Male | 30 | 99 | 97 | 0 |
| 186 | Female | 54 | 101 | 24 | 1 |
| 187 | Male | 28 | 101 | 68 | 0 |
| 188 | Female | 41 | 103 | 17 | 1 |
| 189 | Female | 36 | 103 | 85 | 0 |
| 190 | Female | 34 | 103 | 23 | 2 |
| 191 | Female | 32 | 103 | 69 | 0 |
| 192 | Male | 33 | 113 | 8 | 1 |
| 193 | Female | 38 | 113 | 91 | 0 |
| 194 | Female | 47 | 120 | 16 | 1 |
| 195 | Female | 35 | 120 | 79 | 0 |
| 196 | Female | 45 | 126 | 28 | 1 |
| 197 | Male | 32 | 126 | 74 | 0 |
| 198 | Male | 32 | 137 | 18 | 2 |
| 199 | Male | 30 | 137 | 83 | 0 |
da1 = data[data['age_cluster']==0]
da2 = data[data['age_cluster']==1]
da3 = data[data['age_cluster']==2]
da4 = data[data['age_cluster']==3]
fig = go.Figure()
fig.add_trace(go.Scatter(x=da1['Age'],y=da1['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter(x=da2['Age'],y=da2['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter(x=da3['Age'],y=da3['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter(x=da4['Age'],y=da4['Spending Score (1-100)'],mode="markers"))
Customer Segmentation according to Annual Income (k$) and Spending Score (1-100)
data['Annual Income (k$)'].describe()
count 200.000000 mean 60.560000 std 26.264721 min 15.000000 25% 41.500000 50% 61.500000 75% 78.000000 max 137.000000 Name: Annual Income (k$), dtype: float64
px.histogram(data,x='Annual Income (k$)') #Most customers have Annual Income 70-79 k dollars
sns.scatterplot(x=data['Annual Income (k$)'],y=data['Spending Score (1-100)'],hue=data['Annual Income (k$)'],size=data['Annual Income (k$)'])
<Axes: xlabel='Annual Income (k$)', ylabel='Spending Score (1-100)'>
d2 = data[['Annual Income (k$)','Spending Score (1-100)']]
d2
| Annual Income (k$) | Spending Score (1-100) | |
|---|---|---|
| 0 | 15 | 39 |
| 1 | 15 | 81 |
| 2 | 16 | 6 |
| 3 | 16 | 77 |
| 4 | 17 | 40 |
| 5 | 17 | 76 |
| 6 | 18 | 6 |
| 7 | 18 | 94 |
| 8 | 19 | 3 |
| 9 | 19 | 72 |
| 10 | 19 | 14 |
| 11 | 19 | 99 |
| 12 | 20 | 15 |
| 13 | 20 | 77 |
| 14 | 20 | 13 |
| 15 | 20 | 79 |
| 16 | 21 | 35 |
| 17 | 21 | 66 |
| 18 | 23 | 29 |
| 19 | 23 | 98 |
| 20 | 24 | 35 |
| 21 | 24 | 73 |
| 22 | 25 | 5 |
| 23 | 25 | 73 |
| 24 | 28 | 14 |
| 25 | 28 | 82 |
| 26 | 28 | 32 |
| 27 | 28 | 61 |
| 28 | 29 | 31 |
| 29 | 29 | 87 |
| 30 | 30 | 4 |
| 31 | 30 | 73 |
| 32 | 33 | 4 |
| 33 | 33 | 92 |
| 34 | 33 | 14 |
| 35 | 33 | 81 |
| 36 | 34 | 17 |
| 37 | 34 | 73 |
| 38 | 37 | 26 |
| 39 | 37 | 75 |
| 40 | 38 | 35 |
| 41 | 38 | 92 |
| 42 | 39 | 36 |
| 43 | 39 | 61 |
| 44 | 39 | 28 |
| 45 | 39 | 65 |
| 46 | 40 | 55 |
| 47 | 40 | 47 |
| 48 | 40 | 42 |
| 49 | 40 | 42 |
| 50 | 42 | 52 |
| 51 | 42 | 60 |
| 52 | 43 | 54 |
| 53 | 43 | 60 |
| 54 | 43 | 45 |
| 55 | 43 | 41 |
| 56 | 44 | 50 |
| 57 | 44 | 46 |
| 58 | 46 | 51 |
| 59 | 46 | 46 |
| 60 | 46 | 56 |
| 61 | 46 | 55 |
| 62 | 47 | 52 |
| 63 | 47 | 59 |
| 64 | 48 | 51 |
| 65 | 48 | 59 |
| 66 | 48 | 50 |
| 67 | 48 | 48 |
| 68 | 48 | 59 |
| 69 | 48 | 47 |
| 70 | 49 | 55 |
| 71 | 49 | 42 |
| 72 | 50 | 49 |
| 73 | 50 | 56 |
| 74 | 54 | 47 |
| 75 | 54 | 54 |
| 76 | 54 | 53 |
| 77 | 54 | 48 |
| 78 | 54 | 52 |
| 79 | 54 | 42 |
| 80 | 54 | 51 |
| 81 | 54 | 55 |
| 82 | 54 | 41 |
| 83 | 54 | 44 |
| 84 | 54 | 57 |
| 85 | 54 | 46 |
| 86 | 57 | 58 |
| 87 | 57 | 55 |
| 88 | 58 | 60 |
| 89 | 58 | 46 |
| 90 | 59 | 55 |
| 91 | 59 | 41 |
| 92 | 60 | 49 |
| 93 | 60 | 40 |
| 94 | 60 | 42 |
| 95 | 60 | 52 |
| 96 | 60 | 47 |
| 97 | 60 | 50 |
| 98 | 61 | 42 |
| 99 | 61 | 49 |
| 100 | 62 | 41 |
| 101 | 62 | 48 |
| 102 | 62 | 59 |
| 103 | 62 | 55 |
| 104 | 62 | 56 |
| 105 | 62 | 42 |
| 106 | 63 | 50 |
| 107 | 63 | 46 |
| 108 | 63 | 43 |
| 109 | 63 | 48 |
| 110 | 63 | 52 |
| 111 | 63 | 54 |
| 112 | 64 | 42 |
| 113 | 64 | 46 |
| 114 | 65 | 48 |
| 115 | 65 | 50 |
| 116 | 65 | 43 |
| 117 | 65 | 59 |
| 118 | 67 | 43 |
| 119 | 67 | 57 |
| 120 | 67 | 56 |
| 121 | 67 | 40 |
| 122 | 69 | 58 |
| 123 | 69 | 91 |
| 124 | 70 | 29 |
| 125 | 70 | 77 |
| 126 | 71 | 35 |
| 127 | 71 | 95 |
| 128 | 71 | 11 |
| 129 | 71 | 75 |
| 130 | 71 | 9 |
| 131 | 71 | 75 |
| 132 | 72 | 34 |
| 133 | 72 | 71 |
| 134 | 73 | 5 |
| 135 | 73 | 88 |
| 136 | 73 | 7 |
| 137 | 73 | 73 |
| 138 | 74 | 10 |
| 139 | 74 | 72 |
| 140 | 75 | 5 |
| 141 | 75 | 93 |
| 142 | 76 | 40 |
| 143 | 76 | 87 |
| 144 | 77 | 12 |
| 145 | 77 | 97 |
| 146 | 77 | 36 |
| 147 | 77 | 74 |
| 148 | 78 | 22 |
| 149 | 78 | 90 |
| 150 | 78 | 17 |
| 151 | 78 | 88 |
| 152 | 78 | 20 |
| 153 | 78 | 76 |
| 154 | 78 | 16 |
| 155 | 78 | 89 |
| 156 | 78 | 1 |
| 157 | 78 | 78 |
| 158 | 78 | 1 |
| 159 | 78 | 73 |
| 160 | 79 | 35 |
| 161 | 79 | 83 |
| 162 | 81 | 5 |
| 163 | 81 | 93 |
| 164 | 85 | 26 |
| 165 | 85 | 75 |
| 166 | 86 | 20 |
| 167 | 86 | 95 |
| 168 | 87 | 27 |
| 169 | 87 | 63 |
| 170 | 87 | 13 |
| 171 | 87 | 75 |
| 172 | 87 | 10 |
| 173 | 87 | 92 |
| 174 | 88 | 13 |
| 175 | 88 | 86 |
| 176 | 88 | 15 |
| 177 | 88 | 69 |
| 178 | 93 | 14 |
| 179 | 93 | 90 |
| 180 | 97 | 32 |
| 181 | 97 | 86 |
| 182 | 98 | 15 |
| 183 | 98 | 88 |
| 184 | 99 | 39 |
| 185 | 99 | 97 |
| 186 | 101 | 24 |
| 187 | 101 | 68 |
| 188 | 103 | 17 |
| 189 | 103 | 85 |
| 190 | 103 | 23 |
| 191 | 103 | 69 |
| 192 | 113 | 8 |
| 193 | 113 | 91 |
| 194 | 120 | 16 |
| 195 | 120 | 79 |
| 196 | 126 | 28 |
| 197 | 126 | 74 |
| 198 | 137 | 18 |
| 199 | 137 | 83 |
Scaling all the values
Data2 = sc.fit_transform(d2)
Data2
array([[-1.73899919, -0.43480148],
[-1.73899919, 1.19570407],
[-1.70082976, -1.71591298],
[-1.70082976, 1.04041783],
[-1.66266033, -0.39597992],
[-1.66266033, 1.00159627],
[-1.62449091, -1.71591298],
[-1.62449091, 1.70038436],
[-1.58632148, -1.83237767],
[-1.58632148, 0.84631002],
[-1.58632148, -1.4053405 ],
[-1.58632148, 1.89449216],
[-1.54815205, -1.36651894],
[-1.54815205, 1.04041783],
[-1.54815205, -1.44416206],
[-1.54815205, 1.11806095],
[-1.50998262, -0.59008772],
[-1.50998262, 0.61338066],
[-1.43364376, -0.82301709],
[-1.43364376, 1.8556706 ],
[-1.39547433, -0.59008772],
[-1.39547433, 0.88513158],
[-1.3573049 , -1.75473454],
[-1.3573049 , 0.88513158],
[-1.24279661, -1.4053405 ],
[-1.24279661, 1.23452563],
[-1.24279661, -0.7065524 ],
[-1.24279661, 0.41927286],
[-1.20462718, -0.74537397],
[-1.20462718, 1.42863343],
[-1.16645776, -1.7935561 ],
[-1.16645776, 0.88513158],
[-1.05194947, -1.7935561 ],
[-1.05194947, 1.62274124],
[-1.05194947, -1.4053405 ],
[-1.05194947, 1.19570407],
[-1.01378004, -1.28887582],
[-1.01378004, 0.88513158],
[-0.89927175, -0.93948177],
[-0.89927175, 0.96277471],
[-0.86110232, -0.59008772],
[-0.86110232, 1.62274124],
[-0.82293289, -0.55126616],
[-0.82293289, 0.41927286],
[-0.82293289, -0.86183865],
[-0.82293289, 0.5745591 ],
[-0.78476346, 0.18634349],
[-0.78476346, -0.12422899],
[-0.78476346, -0.3183368 ],
[-0.78476346, -0.3183368 ],
[-0.70842461, 0.06987881],
[-0.70842461, 0.38045129],
[-0.67025518, 0.14752193],
[-0.67025518, 0.38045129],
[-0.67025518, -0.20187212],
[-0.67025518, -0.35715836],
[-0.63208575, -0.00776431],
[-0.63208575, -0.16305055],
[-0.55574689, 0.03105725],
[-0.55574689, -0.16305055],
[-0.55574689, 0.22516505],
[-0.55574689, 0.18634349],
[-0.51757746, 0.06987881],
[-0.51757746, 0.34162973],
[-0.47940803, 0.03105725],
[-0.47940803, 0.34162973],
[-0.47940803, -0.00776431],
[-0.47940803, -0.08540743],
[-0.47940803, 0.34162973],
[-0.47940803, -0.12422899],
[-0.4412386 , 0.18634349],
[-0.4412386 , -0.3183368 ],
[-0.40306917, -0.04658587],
[-0.40306917, 0.22516505],
[-0.25039146, -0.12422899],
[-0.25039146, 0.14752193],
[-0.25039146, 0.10870037],
[-0.25039146, -0.08540743],
[-0.25039146, 0.06987881],
[-0.25039146, -0.3183368 ],
[-0.25039146, 0.03105725],
[-0.25039146, 0.18634349],
[-0.25039146, -0.35715836],
[-0.25039146, -0.24069368],
[-0.25039146, 0.26398661],
[-0.25039146, -0.16305055],
[-0.13588317, 0.30280817],
[-0.13588317, 0.18634349],
[-0.09771374, 0.38045129],
[-0.09771374, -0.16305055],
[-0.05954431, 0.18634349],
[-0.05954431, -0.35715836],
[-0.02137488, -0.04658587],
[-0.02137488, -0.39597992],
[-0.02137488, -0.3183368 ],
[-0.02137488, 0.06987881],
[-0.02137488, -0.12422899],
[-0.02137488, -0.00776431],
[ 0.01679455, -0.3183368 ],
[ 0.01679455, -0.04658587],
[ 0.05496398, -0.35715836],
[ 0.05496398, -0.08540743],
[ 0.05496398, 0.34162973],
[ 0.05496398, 0.18634349],
[ 0.05496398, 0.22516505],
[ 0.05496398, -0.3183368 ],
[ 0.09313341, -0.00776431],
[ 0.09313341, -0.16305055],
[ 0.09313341, -0.27951524],
[ 0.09313341, -0.08540743],
[ 0.09313341, 0.06987881],
[ 0.09313341, 0.14752193],
[ 0.13130284, -0.3183368 ],
[ 0.13130284, -0.16305055],
[ 0.16947227, -0.08540743],
[ 0.16947227, -0.00776431],
[ 0.16947227, -0.27951524],
[ 0.16947227, 0.34162973],
[ 0.24581112, -0.27951524],
[ 0.24581112, 0.26398661],
[ 0.24581112, 0.22516505],
[ 0.24581112, -0.39597992],
[ 0.32214998, 0.30280817],
[ 0.32214998, 1.58391968],
[ 0.36031941, -0.82301709],
[ 0.36031941, 1.04041783],
[ 0.39848884, -0.59008772],
[ 0.39848884, 1.73920592],
[ 0.39848884, -1.52180518],
[ 0.39848884, 0.96277471],
[ 0.39848884, -1.5994483 ],
[ 0.39848884, 0.96277471],
[ 0.43665827, -0.62890928],
[ 0.43665827, 0.80748846],
[ 0.4748277 , -1.75473454],
[ 0.4748277 , 1.46745499],
[ 0.4748277 , -1.67709142],
[ 0.4748277 , 0.88513158],
[ 0.51299713, -1.56062674],
[ 0.51299713, 0.84631002],
[ 0.55116656, -1.75473454],
[ 0.55116656, 1.6615628 ],
[ 0.58933599, -0.39597992],
[ 0.58933599, 1.42863343],
[ 0.62750542, -1.48298362],
[ 0.62750542, 1.81684904],
[ 0.62750542, -0.55126616],
[ 0.62750542, 0.92395314],
[ 0.66567484, -1.09476801],
[ 0.66567484, 1.54509812],
[ 0.66567484, -1.28887582],
[ 0.66567484, 1.46745499],
[ 0.66567484, -1.17241113],
[ 0.66567484, 1.00159627],
[ 0.66567484, -1.32769738],
[ 0.66567484, 1.50627656],
[ 0.66567484, -1.91002079],
[ 0.66567484, 1.07923939],
[ 0.66567484, -1.91002079],
[ 0.66567484, 0.88513158],
[ 0.70384427, -0.59008772],
[ 0.70384427, 1.27334719],
[ 0.78018313, -1.75473454],
[ 0.78018313, 1.6615628 ],
[ 0.93286085, -0.93948177],
[ 0.93286085, 0.96277471],
[ 0.97103028, -1.17241113],
[ 0.97103028, 1.73920592],
[ 1.00919971, -0.90066021],
[ 1.00919971, 0.49691598],
[ 1.00919971, -1.44416206],
[ 1.00919971, 0.96277471],
[ 1.00919971, -1.56062674],
[ 1.00919971, 1.62274124],
[ 1.04736914, -1.44416206],
[ 1.04736914, 1.38981187],
[ 1.04736914, -1.36651894],
[ 1.04736914, 0.72984534],
[ 1.23821628, -1.4053405 ],
[ 1.23821628, 1.54509812],
[ 1.390894 , -0.7065524 ],
[ 1.390894 , 1.38981187],
[ 1.42906343, -1.36651894],
[ 1.42906343, 1.46745499],
[ 1.46723286, -0.43480148],
[ 1.46723286, 1.81684904],
[ 1.54357172, -1.01712489],
[ 1.54357172, 0.69102378],
[ 1.61991057, -1.28887582],
[ 1.61991057, 1.35099031],
[ 1.61991057, -1.05594645],
[ 1.61991057, 0.72984534],
[ 2.00160487, -1.63826986],
[ 2.00160487, 1.58391968],
[ 2.26879087, -1.32769738],
[ 2.26879087, 1.11806095],
[ 2.49780745, -0.86183865],
[ 2.49780745, 0.92395314],
[ 2.91767117, -1.25005425],
[ 2.91767117, 1.27334719]])
Finding the number of clusters using elbow method
wcss = []
for i in range(1,10):
km = KMeans(n_clusters=i)
km.fit_predict(Data2)
a = km.inertia_
wcss.append(a)
wcss
[400.0, 269.0167937490666, 157.70400815035947, 108.92131661364357, 65.56840815571681, 55.10377812115057, 44.91271612308711, 37.19952387597709, 32.345837779566594]
c = range(1,10)
px.line(x=c,y=wcss)
From the above graph it can be seen that number of clusters (k value) is 5
Building the model
km = KMeans(n_clusters=5)
prediction2 = km.fit_predict(Data2)
prediction2
array([4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3,
4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 1,
4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 0, 1, 0, 2, 0, 2, 0,
1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0,
2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0,
2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0,
2, 0])
data['salary_cluster'] = prediction2
data
| Gender | Age | Annual Income (k$) | Spending Score (1-100) | age_cluster | salary_cluster | |
|---|---|---|---|---|---|---|
| 0 | Male | 19 | 15 | 39 | 2 | 4 |
| 1 | Male | 21 | 15 | 81 | 0 | 3 |
| 2 | Female | 20 | 16 | 6 | 2 | 4 |
| 3 | Female | 23 | 16 | 77 | 0 | 3 |
| 4 | Female | 31 | 17 | 40 | 2 | 4 |
| 5 | Female | 22 | 17 | 76 | 0 | 3 |
| 6 | Female | 35 | 18 | 6 | 1 | 4 |
| 7 | Female | 23 | 18 | 94 | 0 | 3 |
| 8 | Male | 64 | 19 | 3 | 1 | 4 |
| 9 | Female | 30 | 19 | 72 | 0 | 3 |
| 10 | Male | 67 | 19 | 14 | 1 | 4 |
| 11 | Female | 35 | 19 | 99 | 0 | 3 |
| 12 | Female | 58 | 20 | 15 | 1 | 4 |
| 13 | Female | 24 | 20 | 77 | 0 | 3 |
| 14 | Male | 37 | 20 | 13 | 1 | 4 |
| 15 | Male | 22 | 20 | 79 | 0 | 3 |
| 16 | Female | 35 | 21 | 35 | 2 | 4 |
| 17 | Male | 20 | 21 | 66 | 0 | 3 |
| 18 | Male | 52 | 23 | 29 | 1 | 4 |
| 19 | Female | 35 | 23 | 98 | 0 | 3 |
| 20 | Male | 35 | 24 | 35 | 2 | 4 |
| 21 | Male | 25 | 24 | 73 | 0 | 3 |
| 22 | Female | 46 | 25 | 5 | 1 | 4 |
| 23 | Male | 31 | 25 | 73 | 0 | 3 |
| 24 | Female | 54 | 28 | 14 | 1 | 4 |
| 25 | Male | 29 | 28 | 82 | 0 | 3 |
| 26 | Female | 45 | 28 | 32 | 1 | 4 |
| 27 | Male | 35 | 28 | 61 | 0 | 3 |
| 28 | Female | 40 | 29 | 31 | 1 | 4 |
| 29 | Female | 23 | 29 | 87 | 0 | 3 |
| 30 | Male | 60 | 30 | 4 | 1 | 4 |
| 31 | Female | 21 | 30 | 73 | 0 | 3 |
| 32 | Male | 53 | 33 | 4 | 1 | 4 |
| 33 | Male | 18 | 33 | 92 | 0 | 3 |
| 34 | Female | 49 | 33 | 14 | 1 | 4 |
| 35 | Female | 21 | 33 | 81 | 0 | 3 |
| 36 | Female | 42 | 34 | 17 | 1 | 4 |
| 37 | Female | 30 | 34 | 73 | 0 | 3 |
| 38 | Female | 36 | 37 | 26 | 2 | 4 |
| 39 | Female | 20 | 37 | 75 | 0 | 3 |
| 40 | Female | 65 | 38 | 35 | 3 | 4 |
| 41 | Male | 24 | 38 | 92 | 0 | 3 |
| 42 | Male | 48 | 39 | 36 | 3 | 4 |
| 43 | Female | 31 | 39 | 61 | 0 | 1 |
| 44 | Female | 49 | 39 | 28 | 1 | 4 |
| 45 | Female | 24 | 39 | 65 | 0 | 3 |
| 46 | Female | 50 | 40 | 55 | 3 | 1 |
| 47 | Female | 27 | 40 | 47 | 2 | 1 |
| 48 | Female | 29 | 40 | 42 | 2 | 1 |
| 49 | Female | 31 | 40 | 42 | 2 | 1 |
| 50 | Female | 49 | 42 | 52 | 3 | 1 |
| 51 | Male | 33 | 42 | 60 | 0 | 1 |
| 52 | Female | 31 | 43 | 54 | 2 | 1 |
| 53 | Male | 59 | 43 | 60 | 3 | 1 |
| 54 | Female | 50 | 43 | 45 | 3 | 1 |
| 55 | Male | 47 | 43 | 41 | 3 | 1 |
| 56 | Female | 51 | 44 | 50 | 3 | 1 |
| 57 | Male | 69 | 44 | 46 | 3 | 1 |
| 58 | Female | 27 | 46 | 51 | 2 | 1 |
| 59 | Male | 53 | 46 | 46 | 3 | 1 |
| 60 | Male | 70 | 46 | 56 | 3 | 1 |
| 61 | Male | 19 | 46 | 55 | 2 | 1 |
| 62 | Female | 67 | 47 | 52 | 3 | 1 |
| 63 | Female | 54 | 47 | 59 | 3 | 1 |
| 64 | Male | 63 | 48 | 51 | 3 | 1 |
| 65 | Male | 18 | 48 | 59 | 2 | 1 |
| 66 | Female | 43 | 48 | 50 | 3 | 1 |
| 67 | Female | 68 | 48 | 48 | 3 | 1 |
| 68 | Male | 19 | 48 | 59 | 2 | 1 |
| 69 | Female | 32 | 48 | 47 | 2 | 1 |
| 70 | Male | 70 | 49 | 55 | 3 | 1 |
| 71 | Female | 47 | 49 | 42 | 3 | 1 |
| 72 | Female | 60 | 50 | 49 | 3 | 1 |
| 73 | Female | 60 | 50 | 56 | 3 | 1 |
| 74 | Male | 59 | 54 | 47 | 3 | 1 |
| 75 | Male | 26 | 54 | 54 | 2 | 1 |
| 76 | Female | 45 | 54 | 53 | 3 | 1 |
| 77 | Male | 40 | 54 | 48 | 2 | 1 |
| 78 | Female | 23 | 54 | 52 | 2 | 1 |
| 79 | Female | 49 | 54 | 42 | 3 | 1 |
| 80 | Male | 57 | 54 | 51 | 3 | 1 |
| 81 | Male | 38 | 54 | 55 | 2 | 1 |
| 82 | Male | 67 | 54 | 41 | 3 | 1 |
| 83 | Female | 46 | 54 | 44 | 3 | 1 |
| 84 | Female | 21 | 54 | 57 | 2 | 1 |
| 85 | Male | 48 | 54 | 46 | 3 | 1 |
| 86 | Female | 55 | 57 | 58 | 3 | 1 |
| 87 | Female | 22 | 57 | 55 | 2 | 1 |
| 88 | Female | 34 | 58 | 60 | 0 | 1 |
| 89 | Female | 50 | 58 | 46 | 3 | 1 |
| 90 | Female | 68 | 59 | 55 | 3 | 1 |
| 91 | Male | 18 | 59 | 41 | 2 | 1 |
| 92 | Male | 48 | 60 | 49 | 3 | 1 |
| 93 | Female | 40 | 60 | 40 | 2 | 1 |
| 94 | Female | 32 | 60 | 42 | 2 | 1 |
| 95 | Male | 24 | 60 | 52 | 2 | 1 |
| 96 | Female | 47 | 60 | 47 | 3 | 1 |
| 97 | Female | 27 | 60 | 50 | 2 | 1 |
| 98 | Male | 48 | 61 | 42 | 3 | 1 |
| 99 | Male | 20 | 61 | 49 | 2 | 1 |
| 100 | Female | 23 | 62 | 41 | 2 | 1 |
| 101 | Female | 49 | 62 | 48 | 3 | 1 |
| 102 | Male | 67 | 62 | 59 | 3 | 1 |
| 103 | Male | 26 | 62 | 55 | 2 | 1 |
| 104 | Male | 49 | 62 | 56 | 3 | 1 |
| 105 | Female | 21 | 62 | 42 | 2 | 1 |
| 106 | Female | 66 | 63 | 50 | 3 | 1 |
| 107 | Male | 54 | 63 | 46 | 3 | 1 |
| 108 | Male | 68 | 63 | 43 | 3 | 1 |
| 109 | Male | 66 | 63 | 48 | 3 | 1 |
| 110 | Male | 65 | 63 | 52 | 3 | 1 |
| 111 | Female | 19 | 63 | 54 | 2 | 1 |
| 112 | Female | 38 | 64 | 42 | 2 | 1 |
| 113 | Male | 19 | 64 | 46 | 2 | 1 |
| 114 | Female | 18 | 65 | 48 | 2 | 1 |
| 115 | Female | 19 | 65 | 50 | 2 | 1 |
| 116 | Female | 63 | 65 | 43 | 3 | 1 |
| 117 | Female | 49 | 65 | 59 | 3 | 1 |
| 118 | Female | 51 | 67 | 43 | 3 | 1 |
| 119 | Female | 50 | 67 | 57 | 3 | 1 |
| 120 | Male | 27 | 67 | 56 | 2 | 1 |
| 121 | Female | 38 | 67 | 40 | 2 | 1 |
| 122 | Female | 40 | 69 | 58 | 0 | 1 |
| 123 | Male | 39 | 69 | 91 | 0 | 0 |
| 124 | Female | 23 | 70 | 29 | 2 | 2 |
| 125 | Female | 31 | 70 | 77 | 0 | 0 |
| 126 | Male | 43 | 71 | 35 | 1 | 1 |
| 127 | Male | 40 | 71 | 95 | 0 | 0 |
| 128 | Male | 59 | 71 | 11 | 1 | 2 |
| 129 | Male | 38 | 71 | 75 | 0 | 0 |
| 130 | Male | 47 | 71 | 9 | 1 | 2 |
| 131 | Male | 39 | 71 | 75 | 0 | 0 |
| 132 | Female | 25 | 72 | 34 | 2 | 1 |
| 133 | Female | 31 | 72 | 71 | 0 | 0 |
| 134 | Male | 20 | 73 | 5 | 2 | 2 |
| 135 | Female | 29 | 73 | 88 | 0 | 0 |
| 136 | Female | 44 | 73 | 7 | 1 | 2 |
| 137 | Male | 32 | 73 | 73 | 0 | 0 |
| 138 | Male | 19 | 74 | 10 | 2 | 2 |
| 139 | Female | 35 | 74 | 72 | 0 | 0 |
| 140 | Female | 57 | 75 | 5 | 1 | 2 |
| 141 | Male | 32 | 75 | 93 | 0 | 0 |
| 142 | Female | 28 | 76 | 40 | 2 | 1 |
| 143 | Female | 32 | 76 | 87 | 0 | 0 |
| 144 | Male | 25 | 77 | 12 | 2 | 2 |
| 145 | Male | 28 | 77 | 97 | 0 | 0 |
| 146 | Male | 48 | 77 | 36 | 3 | 2 |
| 147 | Female | 32 | 77 | 74 | 0 | 0 |
| 148 | Female | 34 | 78 | 22 | 2 | 2 |
| 149 | Male | 34 | 78 | 90 | 0 | 0 |
| 150 | Male | 43 | 78 | 17 | 1 | 2 |
| 151 | Male | 39 | 78 | 88 | 0 | 0 |
| 152 | Female | 44 | 78 | 20 | 1 | 2 |
| 153 | Female | 38 | 78 | 76 | 0 | 0 |
| 154 | Female | 47 | 78 | 16 | 1 | 2 |
| 155 | Female | 27 | 78 | 89 | 0 | 0 |
| 156 | Male | 37 | 78 | 1 | 1 | 2 |
| 157 | Female | 30 | 78 | 78 | 0 | 0 |
| 158 | Male | 34 | 78 | 1 | 1 | 2 |
| 159 | Female | 30 | 78 | 73 | 0 | 0 |
| 160 | Female | 56 | 79 | 35 | 3 | 2 |
| 161 | Female | 29 | 79 | 83 | 0 | 0 |
| 162 | Male | 19 | 81 | 5 | 2 | 2 |
| 163 | Female | 31 | 81 | 93 | 0 | 0 |
| 164 | Male | 50 | 85 | 26 | 1 | 2 |
| 165 | Female | 36 | 85 | 75 | 0 | 0 |
| 166 | Male | 42 | 86 | 20 | 1 | 2 |
| 167 | Female | 33 | 86 | 95 | 0 | 0 |
| 168 | Female | 36 | 87 | 27 | 2 | 2 |
| 169 | Male | 32 | 87 | 63 | 0 | 0 |
| 170 | Male | 40 | 87 | 13 | 1 | 2 |
| 171 | Male | 28 | 87 | 75 | 0 | 0 |
| 172 | Male | 36 | 87 | 10 | 1 | 2 |
| 173 | Male | 36 | 87 | 92 | 0 | 0 |
| 174 | Female | 52 | 88 | 13 | 1 | 2 |
| 175 | Female | 30 | 88 | 86 | 0 | 0 |
| 176 | Male | 58 | 88 | 15 | 1 | 2 |
| 177 | Male | 27 | 88 | 69 | 0 | 0 |
| 178 | Male | 59 | 93 | 14 | 1 | 2 |
| 179 | Male | 35 | 93 | 90 | 0 | 0 |
| 180 | Female | 37 | 97 | 32 | 2 | 2 |
| 181 | Female | 32 | 97 | 86 | 0 | 0 |
| 182 | Male | 46 | 98 | 15 | 1 | 2 |
| 183 | Female | 29 | 98 | 88 | 0 | 0 |
| 184 | Female | 41 | 99 | 39 | 2 | 2 |
| 185 | Male | 30 | 99 | 97 | 0 | 0 |
| 186 | Female | 54 | 101 | 24 | 1 | 2 |
| 187 | Male | 28 | 101 | 68 | 0 | 0 |
| 188 | Female | 41 | 103 | 17 | 1 | 2 |
| 189 | Female | 36 | 103 | 85 | 0 | 0 |
| 190 | Female | 34 | 103 | 23 | 2 | 2 |
| 191 | Female | 32 | 103 | 69 | 0 | 0 |
| 192 | Male | 33 | 113 | 8 | 1 | 2 |
| 193 | Female | 38 | 113 | 91 | 0 | 0 |
| 194 | Female | 47 | 120 | 16 | 1 | 2 |
| 195 | Female | 35 | 120 | 79 | 0 | 0 |
| 196 | Female | 45 | 126 | 28 | 1 | 2 |
| 197 | Male | 32 | 126 | 74 | 0 | 0 |
| 198 | Male | 32 | 137 | 18 | 2 | 2 |
| 199 | Male | 30 | 137 | 83 | 0 | 0 |
ds1 = data[data['salary_cluster']==0]
ds2 = data[data['salary_cluster']==1]
ds3 = data[data['salary_cluster']==2]
ds4 = data[data['salary_cluster']==3]
ds5 = data[data['salary_cluster']==4]
fig = go.Figure()
fig.add_trace(go.Scatter(x=ds1['Annual Income (k$)'],y=ds1['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter(x=ds2['Annual Income (k$)'],y=ds2['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter(x=ds3['Annual Income (k$)'],y=ds3['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter(x=ds4['Annual Income (k$)'],y=ds4['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter(x=ds5['Annual Income (k$)'],y=ds5['Spending Score (1-100)'],mode="markers"))
Customer segmentation according to Age, Annual Income and Spending Score
d3 = data[['Age','Annual Income (k$)','Spending Score (1-100)']]
d3
| Age | Annual Income (k$) | Spending Score (1-100) | |
|---|---|---|---|
| 0 | 19 | 15 | 39 |
| 1 | 21 | 15 | 81 |
| 2 | 20 | 16 | 6 |
| 3 | 23 | 16 | 77 |
| 4 | 31 | 17 | 40 |
| 5 | 22 | 17 | 76 |
| 6 | 35 | 18 | 6 |
| 7 | 23 | 18 | 94 |
| 8 | 64 | 19 | 3 |
| 9 | 30 | 19 | 72 |
| 10 | 67 | 19 | 14 |
| 11 | 35 | 19 | 99 |
| 12 | 58 | 20 | 15 |
| 13 | 24 | 20 | 77 |
| 14 | 37 | 20 | 13 |
| 15 | 22 | 20 | 79 |
| 16 | 35 | 21 | 35 |
| 17 | 20 | 21 | 66 |
| 18 | 52 | 23 | 29 |
| 19 | 35 | 23 | 98 |
| 20 | 35 | 24 | 35 |
| 21 | 25 | 24 | 73 |
| 22 | 46 | 25 | 5 |
| 23 | 31 | 25 | 73 |
| 24 | 54 | 28 | 14 |
| 25 | 29 | 28 | 82 |
| 26 | 45 | 28 | 32 |
| 27 | 35 | 28 | 61 |
| 28 | 40 | 29 | 31 |
| 29 | 23 | 29 | 87 |
| 30 | 60 | 30 | 4 |
| 31 | 21 | 30 | 73 |
| 32 | 53 | 33 | 4 |
| 33 | 18 | 33 | 92 |
| 34 | 49 | 33 | 14 |
| 35 | 21 | 33 | 81 |
| 36 | 42 | 34 | 17 |
| 37 | 30 | 34 | 73 |
| 38 | 36 | 37 | 26 |
| 39 | 20 | 37 | 75 |
| 40 | 65 | 38 | 35 |
| 41 | 24 | 38 | 92 |
| 42 | 48 | 39 | 36 |
| 43 | 31 | 39 | 61 |
| 44 | 49 | 39 | 28 |
| 45 | 24 | 39 | 65 |
| 46 | 50 | 40 | 55 |
| 47 | 27 | 40 | 47 |
| 48 | 29 | 40 | 42 |
| 49 | 31 | 40 | 42 |
| 50 | 49 | 42 | 52 |
| 51 | 33 | 42 | 60 |
| 52 | 31 | 43 | 54 |
| 53 | 59 | 43 | 60 |
| 54 | 50 | 43 | 45 |
| 55 | 47 | 43 | 41 |
| 56 | 51 | 44 | 50 |
| 57 | 69 | 44 | 46 |
| 58 | 27 | 46 | 51 |
| 59 | 53 | 46 | 46 |
| 60 | 70 | 46 | 56 |
| 61 | 19 | 46 | 55 |
| 62 | 67 | 47 | 52 |
| 63 | 54 | 47 | 59 |
| 64 | 63 | 48 | 51 |
| 65 | 18 | 48 | 59 |
| 66 | 43 | 48 | 50 |
| 67 | 68 | 48 | 48 |
| 68 | 19 | 48 | 59 |
| 69 | 32 | 48 | 47 |
| 70 | 70 | 49 | 55 |
| 71 | 47 | 49 | 42 |
| 72 | 60 | 50 | 49 |
| 73 | 60 | 50 | 56 |
| 74 | 59 | 54 | 47 |
| 75 | 26 | 54 | 54 |
| 76 | 45 | 54 | 53 |
| 77 | 40 | 54 | 48 |
| 78 | 23 | 54 | 52 |
| 79 | 49 | 54 | 42 |
| 80 | 57 | 54 | 51 |
| 81 | 38 | 54 | 55 |
| 82 | 67 | 54 | 41 |
| 83 | 46 | 54 | 44 |
| 84 | 21 | 54 | 57 |
| 85 | 48 | 54 | 46 |
| 86 | 55 | 57 | 58 |
| 87 | 22 | 57 | 55 |
| 88 | 34 | 58 | 60 |
| 89 | 50 | 58 | 46 |
| 90 | 68 | 59 | 55 |
| 91 | 18 | 59 | 41 |
| 92 | 48 | 60 | 49 |
| 93 | 40 | 60 | 40 |
| 94 | 32 | 60 | 42 |
| 95 | 24 | 60 | 52 |
| 96 | 47 | 60 | 47 |
| 97 | 27 | 60 | 50 |
| 98 | 48 | 61 | 42 |
| 99 | 20 | 61 | 49 |
| 100 | 23 | 62 | 41 |
| 101 | 49 | 62 | 48 |
| 102 | 67 | 62 | 59 |
| 103 | 26 | 62 | 55 |
| 104 | 49 | 62 | 56 |
| 105 | 21 | 62 | 42 |
| 106 | 66 | 63 | 50 |
| 107 | 54 | 63 | 46 |
| 108 | 68 | 63 | 43 |
| 109 | 66 | 63 | 48 |
| 110 | 65 | 63 | 52 |
| 111 | 19 | 63 | 54 |
| 112 | 38 | 64 | 42 |
| 113 | 19 | 64 | 46 |
| 114 | 18 | 65 | 48 |
| 115 | 19 | 65 | 50 |
| 116 | 63 | 65 | 43 |
| 117 | 49 | 65 | 59 |
| 118 | 51 | 67 | 43 |
| 119 | 50 | 67 | 57 |
| 120 | 27 | 67 | 56 |
| 121 | 38 | 67 | 40 |
| 122 | 40 | 69 | 58 |
| 123 | 39 | 69 | 91 |
| 124 | 23 | 70 | 29 |
| 125 | 31 | 70 | 77 |
| 126 | 43 | 71 | 35 |
| 127 | 40 | 71 | 95 |
| 128 | 59 | 71 | 11 |
| 129 | 38 | 71 | 75 |
| 130 | 47 | 71 | 9 |
| 131 | 39 | 71 | 75 |
| 132 | 25 | 72 | 34 |
| 133 | 31 | 72 | 71 |
| 134 | 20 | 73 | 5 |
| 135 | 29 | 73 | 88 |
| 136 | 44 | 73 | 7 |
| 137 | 32 | 73 | 73 |
| 138 | 19 | 74 | 10 |
| 139 | 35 | 74 | 72 |
| 140 | 57 | 75 | 5 |
| 141 | 32 | 75 | 93 |
| 142 | 28 | 76 | 40 |
| 143 | 32 | 76 | 87 |
| 144 | 25 | 77 | 12 |
| 145 | 28 | 77 | 97 |
| 146 | 48 | 77 | 36 |
| 147 | 32 | 77 | 74 |
| 148 | 34 | 78 | 22 |
| 149 | 34 | 78 | 90 |
| 150 | 43 | 78 | 17 |
| 151 | 39 | 78 | 88 |
| 152 | 44 | 78 | 20 |
| 153 | 38 | 78 | 76 |
| 154 | 47 | 78 | 16 |
| 155 | 27 | 78 | 89 |
| 156 | 37 | 78 | 1 |
| 157 | 30 | 78 | 78 |
| 158 | 34 | 78 | 1 |
| 159 | 30 | 78 | 73 |
| 160 | 56 | 79 | 35 |
| 161 | 29 | 79 | 83 |
| 162 | 19 | 81 | 5 |
| 163 | 31 | 81 | 93 |
| 164 | 50 | 85 | 26 |
| 165 | 36 | 85 | 75 |
| 166 | 42 | 86 | 20 |
| 167 | 33 | 86 | 95 |
| 168 | 36 | 87 | 27 |
| 169 | 32 | 87 | 63 |
| 170 | 40 | 87 | 13 |
| 171 | 28 | 87 | 75 |
| 172 | 36 | 87 | 10 |
| 173 | 36 | 87 | 92 |
| 174 | 52 | 88 | 13 |
| 175 | 30 | 88 | 86 |
| 176 | 58 | 88 | 15 |
| 177 | 27 | 88 | 69 |
| 178 | 59 | 93 | 14 |
| 179 | 35 | 93 | 90 |
| 180 | 37 | 97 | 32 |
| 181 | 32 | 97 | 86 |
| 182 | 46 | 98 | 15 |
| 183 | 29 | 98 | 88 |
| 184 | 41 | 99 | 39 |
| 185 | 30 | 99 | 97 |
| 186 | 54 | 101 | 24 |
| 187 | 28 | 101 | 68 |
| 188 | 41 | 103 | 17 |
| 189 | 36 | 103 | 85 |
| 190 | 34 | 103 | 23 |
| 191 | 32 | 103 | 69 |
| 192 | 33 | 113 | 8 |
| 193 | 38 | 113 | 91 |
| 194 | 47 | 120 | 16 |
| 195 | 35 | 120 | 79 |
| 196 | 45 | 126 | 28 |
| 197 | 32 | 126 | 74 |
| 198 | 32 | 137 | 18 |
| 199 | 30 | 137 | 83 |
Scaling the values of d3
Data3 = sc.fit_transform(d3)
Data3
array([[-1.42456879, -1.73899919, -0.43480148],
[-1.28103541, -1.73899919, 1.19570407],
[-1.3528021 , -1.70082976, -1.71591298],
[-1.13750203, -1.70082976, 1.04041783],
[-0.56336851, -1.66266033, -0.39597992],
[-1.20926872, -1.66266033, 1.00159627],
[-0.27630176, -1.62449091, -1.71591298],
[-1.13750203, -1.62449091, 1.70038436],
[ 1.80493225, -1.58632148, -1.83237767],
[-0.6351352 , -1.58632148, 0.84631002],
[ 2.02023231, -1.58632148, -1.4053405 ],
[-0.27630176, -1.58632148, 1.89449216],
[ 1.37433211, -1.54815205, -1.36651894],
[-1.06573534, -1.54815205, 1.04041783],
[-0.13276838, -1.54815205, -1.44416206],
[-1.20926872, -1.54815205, 1.11806095],
[-0.27630176, -1.50998262, -0.59008772],
[-1.3528021 , -1.50998262, 0.61338066],
[ 0.94373197, -1.43364376, -0.82301709],
[-0.27630176, -1.43364376, 1.8556706 ],
[-0.27630176, -1.39547433, -0.59008772],
[-0.99396865, -1.39547433, 0.88513158],
[ 0.51313183, -1.3573049 , -1.75473454],
[-0.56336851, -1.3573049 , 0.88513158],
[ 1.08726535, -1.24279661, -1.4053405 ],
[-0.70690189, -1.24279661, 1.23452563],
[ 0.44136514, -1.24279661, -0.7065524 ],
[-0.27630176, -1.24279661, 0.41927286],
[ 0.08253169, -1.20462718, -0.74537397],
[-1.13750203, -1.20462718, 1.42863343],
[ 1.51786549, -1.16645776, -1.7935561 ],
[-1.28103541, -1.16645776, 0.88513158],
[ 1.01549866, -1.05194947, -1.7935561 ],
[-1.49633548, -1.05194947, 1.62274124],
[ 0.7284319 , -1.05194947, -1.4053405 ],
[-1.28103541, -1.05194947, 1.19570407],
[ 0.22606507, -1.01378004, -1.28887582],
[-0.6351352 , -1.01378004, 0.88513158],
[-0.20453507, -0.89927175, -0.93948177],
[-1.3528021 , -0.89927175, 0.96277471],
[ 1.87669894, -0.86110232, -0.59008772],
[-1.06573534, -0.86110232, 1.62274124],
[ 0.65666521, -0.82293289, -0.55126616],
[-0.56336851, -0.82293289, 0.41927286],
[ 0.7284319 , -0.82293289, -0.86183865],
[-1.06573534, -0.82293289, 0.5745591 ],
[ 0.80019859, -0.78476346, 0.18634349],
[-0.85043527, -0.78476346, -0.12422899],
[-0.70690189, -0.78476346, -0.3183368 ],
[-0.56336851, -0.78476346, -0.3183368 ],
[ 0.7284319 , -0.70842461, 0.06987881],
[-0.41983513, -0.70842461, 0.38045129],
[-0.56336851, -0.67025518, 0.14752193],
[ 1.4460988 , -0.67025518, 0.38045129],
[ 0.80019859, -0.67025518, -0.20187212],
[ 0.58489852, -0.67025518, -0.35715836],
[ 0.87196528, -0.63208575, -0.00776431],
[ 2.16376569, -0.63208575, -0.16305055],
[-0.85043527, -0.55574689, 0.03105725],
[ 1.01549866, -0.55574689, -0.16305055],
[ 2.23553238, -0.55574689, 0.22516505],
[-1.42456879, -0.55574689, 0.18634349],
[ 2.02023231, -0.51757746, 0.06987881],
[ 1.08726535, -0.51757746, 0.34162973],
[ 1.73316556, -0.47940803, 0.03105725],
[-1.49633548, -0.47940803, 0.34162973],
[ 0.29783176, -0.47940803, -0.00776431],
[ 2.091999 , -0.47940803, -0.08540743],
[-1.42456879, -0.47940803, 0.34162973],
[-0.49160182, -0.47940803, -0.12422899],
[ 2.23553238, -0.4412386 , 0.18634349],
[ 0.58489852, -0.4412386 , -0.3183368 ],
[ 1.51786549, -0.40306917, -0.04658587],
[ 1.51786549, -0.40306917, 0.22516505],
[ 1.4460988 , -0.25039146, -0.12422899],
[-0.92220196, -0.25039146, 0.14752193],
[ 0.44136514, -0.25039146, 0.10870037],
[ 0.08253169, -0.25039146, -0.08540743],
[-1.13750203, -0.25039146, 0.06987881],
[ 0.7284319 , -0.25039146, -0.3183368 ],
[ 1.30256542, -0.25039146, 0.03105725],
[-0.06100169, -0.25039146, 0.18634349],
[ 2.02023231, -0.25039146, -0.35715836],
[ 0.51313183, -0.25039146, -0.24069368],
[-1.28103541, -0.25039146, 0.26398661],
[ 0.65666521, -0.25039146, -0.16305055],
[ 1.15903204, -0.13588317, 0.30280817],
[-1.20926872, -0.13588317, 0.18634349],
[-0.34806844, -0.09771374, 0.38045129],
[ 0.80019859, -0.09771374, -0.16305055],
[ 2.091999 , -0.05954431, 0.18634349],
[-1.49633548, -0.05954431, -0.35715836],
[ 0.65666521, -0.02137488, -0.04658587],
[ 0.08253169, -0.02137488, -0.39597992],
[-0.49160182, -0.02137488, -0.3183368 ],
[-1.06573534, -0.02137488, 0.06987881],
[ 0.58489852, -0.02137488, -0.12422899],
[-0.85043527, -0.02137488, -0.00776431],
[ 0.65666521, 0.01679455, -0.3183368 ],
[-1.3528021 , 0.01679455, -0.04658587],
[-1.13750203, 0.05496398, -0.35715836],
[ 0.7284319 , 0.05496398, -0.08540743],
[ 2.02023231, 0.05496398, 0.34162973],
[-0.92220196, 0.05496398, 0.18634349],
[ 0.7284319 , 0.05496398, 0.22516505],
[-1.28103541, 0.05496398, -0.3183368 ],
[ 1.94846562, 0.09313341, -0.00776431],
[ 1.08726535, 0.09313341, -0.16305055],
[ 2.091999 , 0.09313341, -0.27951524],
[ 1.94846562, 0.09313341, -0.08540743],
[ 1.87669894, 0.09313341, 0.06987881],
[-1.42456879, 0.09313341, 0.14752193],
[-0.06100169, 0.13130284, -0.3183368 ],
[-1.42456879, 0.13130284, -0.16305055],
[-1.49633548, 0.16947227, -0.08540743],
[-1.42456879, 0.16947227, -0.00776431],
[ 1.73316556, 0.16947227, -0.27951524],
[ 0.7284319 , 0.16947227, 0.34162973],
[ 0.87196528, 0.24581112, -0.27951524],
[ 0.80019859, 0.24581112, 0.26398661],
[-0.85043527, 0.24581112, 0.22516505],
[-0.06100169, 0.24581112, -0.39597992],
[ 0.08253169, 0.32214998, 0.30280817],
[ 0.010765 , 0.32214998, 1.58391968],
[-1.13750203, 0.36031941, -0.82301709],
[-0.56336851, 0.36031941, 1.04041783],
[ 0.29783176, 0.39848884, -0.59008772],
[ 0.08253169, 0.39848884, 1.73920592],
[ 1.4460988 , 0.39848884, -1.52180518],
[-0.06100169, 0.39848884, 0.96277471],
[ 0.58489852, 0.39848884, -1.5994483 ],
[ 0.010765 , 0.39848884, 0.96277471],
[-0.99396865, 0.43665827, -0.62890928],
[-0.56336851, 0.43665827, 0.80748846],
[-1.3528021 , 0.4748277 , -1.75473454],
[-0.70690189, 0.4748277 , 1.46745499],
[ 0.36959845, 0.4748277 , -1.67709142],
[-0.49160182, 0.4748277 , 0.88513158],
[-1.42456879, 0.51299713, -1.56062674],
[-0.27630176, 0.51299713, 0.84631002],
[ 1.30256542, 0.55116656, -1.75473454],
[-0.49160182, 0.55116656, 1.6615628 ],
[-0.77866858, 0.58933599, -0.39597992],
[-0.49160182, 0.58933599, 1.42863343],
[-0.99396865, 0.62750542, -1.48298362],
[-0.77866858, 0.62750542, 1.81684904],
[ 0.65666521, 0.62750542, -0.55126616],
[-0.49160182, 0.62750542, 0.92395314],
[-0.34806844, 0.66567484, -1.09476801],
[-0.34806844, 0.66567484, 1.54509812],
[ 0.29783176, 0.66567484, -1.28887582],
[ 0.010765 , 0.66567484, 1.46745499],
[ 0.36959845, 0.66567484, -1.17241113],
[-0.06100169, 0.66567484, 1.00159627],
[ 0.58489852, 0.66567484, -1.32769738],
[-0.85043527, 0.66567484, 1.50627656],
[-0.13276838, 0.66567484, -1.91002079],
[-0.6351352 , 0.66567484, 1.07923939],
[-0.34806844, 0.66567484, -1.91002079],
[-0.6351352 , 0.66567484, 0.88513158],
[ 1.23079873, 0.70384427, -0.59008772],
[-0.70690189, 0.70384427, 1.27334719],
[-1.42456879, 0.78018313, -1.75473454],
[-0.56336851, 0.78018313, 1.6615628 ],
[ 0.80019859, 0.93286085, -0.93948177],
[-0.20453507, 0.93286085, 0.96277471],
[ 0.22606507, 0.97103028, -1.17241113],
[-0.41983513, 0.97103028, 1.73920592],
[-0.20453507, 1.00919971, -0.90066021],
[-0.49160182, 1.00919971, 0.49691598],
[ 0.08253169, 1.00919971, -1.44416206],
[-0.77866858, 1.00919971, 0.96277471],
[-0.20453507, 1.00919971, -1.56062674],
[-0.20453507, 1.00919971, 1.62274124],
[ 0.94373197, 1.04736914, -1.44416206],
[-0.6351352 , 1.04736914, 1.38981187],
[ 1.37433211, 1.04736914, -1.36651894],
[-0.85043527, 1.04736914, 0.72984534],
[ 1.4460988 , 1.23821628, -1.4053405 ],
[-0.27630176, 1.23821628, 1.54509812],
[-0.13276838, 1.390894 , -0.7065524 ],
[-0.49160182, 1.390894 , 1.38981187],
[ 0.51313183, 1.42906343, -1.36651894],
[-0.70690189, 1.42906343, 1.46745499],
[ 0.15429838, 1.46723286, -0.43480148],
[-0.6351352 , 1.46723286, 1.81684904],
[ 1.08726535, 1.54357172, -1.01712489],
[-0.77866858, 1.54357172, 0.69102378],
[ 0.15429838, 1.61991057, -1.28887582],
[-0.20453507, 1.61991057, 1.35099031],
[-0.34806844, 1.61991057, -1.05594645],
[-0.49160182, 1.61991057, 0.72984534],
[-0.41983513, 2.00160487, -1.63826986],
[-0.06100169, 2.00160487, 1.58391968],
[ 0.58489852, 2.26879087, -1.32769738],
[-0.27630176, 2.26879087, 1.11806095],
[ 0.44136514, 2.49780745, -0.86183865],
[-0.49160182, 2.49780745, 0.92395314],
[-0.49160182, 2.91767117, -1.25005425],
[-0.6351352 , 2.91767117, 1.27334719]])
Finding the number of clusters using elbow method
wcss = []
for i in range(1,10):
km = KMeans(n_clusters=i)
km.fit_predict(Data3)
a = km.inertia_
wcss.append(a)
wcss
[599.9999999999999, 389.3861889564372, 295.2122461555488, 205.22514747675913, 168.24758017556834, 133.86842085478855, 117.16911517019713, 104.09784107506621, 92.43257435420892]
d = range(1,10)
px.line(x=d,y=wcss)
From the graph above, it can be seen that the number of clusters is 6
Building the model
km = KMeans(n_clusters=6)
prediction3 = km.fit_predict(Data3)
prediction3
array([4, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4,
0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 5, 4, 0, 2,
0, 4, 5, 2, 2, 2, 5, 2, 2, 5, 5, 5, 5, 5, 2, 5, 5, 2, 5, 5, 5, 2,
5, 5, 2, 2, 5, 5, 5, 5, 5, 2, 5, 2, 2, 5, 5, 2, 5, 5, 2, 5, 5, 2,
2, 5, 5, 2, 5, 2, 2, 2, 5, 2, 5, 2, 2, 5, 5, 2, 5, 2, 5, 5, 5, 5,
5, 2, 2, 2, 2, 2, 5, 5, 5, 5, 2, 2, 2, 1, 2, 1, 3, 1, 3, 1, 3, 1,
2, 1, 3, 1, 3, 1, 2, 1, 3, 1, 2, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1, 3, 1, 3, 1, 5, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1,
3, 1])
data['all_clusters'] = prediction3
data
| Gender | Age | Annual Income (k$) | Spending Score (1-100) | age_cluster | salary_cluster | all_clusters | |
|---|---|---|---|---|---|---|---|
| 0 | Male | 19 | 15 | 39 | 2 | 4 | 4 |
| 1 | Male | 21 | 15 | 81 | 0 | 3 | 4 |
| 2 | Female | 20 | 16 | 6 | 2 | 4 | 0 |
| 3 | Female | 23 | 16 | 77 | 0 | 3 | 4 |
| 4 | Female | 31 | 17 | 40 | 2 | 4 | 0 |
| 5 | Female | 22 | 17 | 76 | 0 | 3 | 4 |
| 6 | Female | 35 | 18 | 6 | 1 | 4 | 0 |
| 7 | Female | 23 | 18 | 94 | 0 | 3 | 4 |
| 8 | Male | 64 | 19 | 3 | 1 | 4 | 0 |
| 9 | Female | 30 | 19 | 72 | 0 | 3 | 4 |
| 10 | Male | 67 | 19 | 14 | 1 | 4 | 0 |
| 11 | Female | 35 | 19 | 99 | 0 | 3 | 4 |
| 12 | Female | 58 | 20 | 15 | 1 | 4 | 0 |
| 13 | Female | 24 | 20 | 77 | 0 | 3 | 4 |
| 14 | Male | 37 | 20 | 13 | 1 | 4 | 0 |
| 15 | Male | 22 | 20 | 79 | 0 | 3 | 4 |
| 16 | Female | 35 | 21 | 35 | 2 | 4 | 0 |
| 17 | Male | 20 | 21 | 66 | 0 | 3 | 4 |
| 18 | Male | 52 | 23 | 29 | 1 | 4 | 0 |
| 19 | Female | 35 | 23 | 98 | 0 | 3 | 4 |
| 20 | Male | 35 | 24 | 35 | 2 | 4 | 0 |
| 21 | Male | 25 | 24 | 73 | 0 | 3 | 4 |
| 22 | Female | 46 | 25 | 5 | 1 | 4 | 0 |
| 23 | Male | 31 | 25 | 73 | 0 | 3 | 4 |
| 24 | Female | 54 | 28 | 14 | 1 | 4 | 0 |
| 25 | Male | 29 | 28 | 82 | 0 | 3 | 4 |
| 26 | Female | 45 | 28 | 32 | 1 | 4 | 0 |
| 27 | Male | 35 | 28 | 61 | 0 | 3 | 4 |
| 28 | Female | 40 | 29 | 31 | 1 | 4 | 0 |
| 29 | Female | 23 | 29 | 87 | 0 | 3 | 4 |
| 30 | Male | 60 | 30 | 4 | 1 | 4 | 0 |
| 31 | Female | 21 | 30 | 73 | 0 | 3 | 4 |
| 32 | Male | 53 | 33 | 4 | 1 | 4 | 0 |
| 33 | Male | 18 | 33 | 92 | 0 | 3 | 4 |
| 34 | Female | 49 | 33 | 14 | 1 | 4 | 0 |
| 35 | Female | 21 | 33 | 81 | 0 | 3 | 4 |
| 36 | Female | 42 | 34 | 17 | 1 | 4 | 0 |
| 37 | Female | 30 | 34 | 73 | 0 | 3 | 4 |
| 38 | Female | 36 | 37 | 26 | 2 | 4 | 0 |
| 39 | Female | 20 | 37 | 75 | 0 | 3 | 4 |
| 40 | Female | 65 | 38 | 35 | 3 | 4 | 5 |
| 41 | Male | 24 | 38 | 92 | 0 | 3 | 4 |
| 42 | Male | 48 | 39 | 36 | 3 | 4 | 0 |
| 43 | Female | 31 | 39 | 61 | 0 | 1 | 2 |
| 44 | Female | 49 | 39 | 28 | 1 | 4 | 0 |
| 45 | Female | 24 | 39 | 65 | 0 | 3 | 4 |
| 46 | Female | 50 | 40 | 55 | 3 | 1 | 5 |
| 47 | Female | 27 | 40 | 47 | 2 | 1 | 2 |
| 48 | Female | 29 | 40 | 42 | 2 | 1 | 2 |
| 49 | Female | 31 | 40 | 42 | 2 | 1 | 2 |
| 50 | Female | 49 | 42 | 52 | 3 | 1 | 5 |
| 51 | Male | 33 | 42 | 60 | 0 | 1 | 2 |
| 52 | Female | 31 | 43 | 54 | 2 | 1 | 2 |
| 53 | Male | 59 | 43 | 60 | 3 | 1 | 5 |
| 54 | Female | 50 | 43 | 45 | 3 | 1 | 5 |
| 55 | Male | 47 | 43 | 41 | 3 | 1 | 5 |
| 56 | Female | 51 | 44 | 50 | 3 | 1 | 5 |
| 57 | Male | 69 | 44 | 46 | 3 | 1 | 5 |
| 58 | Female | 27 | 46 | 51 | 2 | 1 | 2 |
| 59 | Male | 53 | 46 | 46 | 3 | 1 | 5 |
| 60 | Male | 70 | 46 | 56 | 3 | 1 | 5 |
| 61 | Male | 19 | 46 | 55 | 2 | 1 | 2 |
| 62 | Female | 67 | 47 | 52 | 3 | 1 | 5 |
| 63 | Female | 54 | 47 | 59 | 3 | 1 | 5 |
| 64 | Male | 63 | 48 | 51 | 3 | 1 | 5 |
| 65 | Male | 18 | 48 | 59 | 2 | 1 | 2 |
| 66 | Female | 43 | 48 | 50 | 3 | 1 | 5 |
| 67 | Female | 68 | 48 | 48 | 3 | 1 | 5 |
| 68 | Male | 19 | 48 | 59 | 2 | 1 | 2 |
| 69 | Female | 32 | 48 | 47 | 2 | 1 | 2 |
| 70 | Male | 70 | 49 | 55 | 3 | 1 | 5 |
| 71 | Female | 47 | 49 | 42 | 3 | 1 | 5 |
| 72 | Female | 60 | 50 | 49 | 3 | 1 | 5 |
| 73 | Female | 60 | 50 | 56 | 3 | 1 | 5 |
| 74 | Male | 59 | 54 | 47 | 3 | 1 | 5 |
| 75 | Male | 26 | 54 | 54 | 2 | 1 | 2 |
| 76 | Female | 45 | 54 | 53 | 3 | 1 | 5 |
| 77 | Male | 40 | 54 | 48 | 2 | 1 | 2 |
| 78 | Female | 23 | 54 | 52 | 2 | 1 | 2 |
| 79 | Female | 49 | 54 | 42 | 3 | 1 | 5 |
| 80 | Male | 57 | 54 | 51 | 3 | 1 | 5 |
| 81 | Male | 38 | 54 | 55 | 2 | 1 | 2 |
| 82 | Male | 67 | 54 | 41 | 3 | 1 | 5 |
| 83 | Female | 46 | 54 | 44 | 3 | 1 | 5 |
| 84 | Female | 21 | 54 | 57 | 2 | 1 | 2 |
| 85 | Male | 48 | 54 | 46 | 3 | 1 | 5 |
| 86 | Female | 55 | 57 | 58 | 3 | 1 | 5 |
| 87 | Female | 22 | 57 | 55 | 2 | 1 | 2 |
| 88 | Female | 34 | 58 | 60 | 0 | 1 | 2 |
| 89 | Female | 50 | 58 | 46 | 3 | 1 | 5 |
| 90 | Female | 68 | 59 | 55 | 3 | 1 | 5 |
| 91 | Male | 18 | 59 | 41 | 2 | 1 | 2 |
| 92 | Male | 48 | 60 | 49 | 3 | 1 | 5 |
| 93 | Female | 40 | 60 | 40 | 2 | 1 | 2 |
| 94 | Female | 32 | 60 | 42 | 2 | 1 | 2 |
| 95 | Male | 24 | 60 | 52 | 2 | 1 | 2 |
| 96 | Female | 47 | 60 | 47 | 3 | 1 | 5 |
| 97 | Female | 27 | 60 | 50 | 2 | 1 | 2 |
| 98 | Male | 48 | 61 | 42 | 3 | 1 | 5 |
| 99 | Male | 20 | 61 | 49 | 2 | 1 | 2 |
| 100 | Female | 23 | 62 | 41 | 2 | 1 | 2 |
| 101 | Female | 49 | 62 | 48 | 3 | 1 | 5 |
| 102 | Male | 67 | 62 | 59 | 3 | 1 | 5 |
| 103 | Male | 26 | 62 | 55 | 2 | 1 | 2 |
| 104 | Male | 49 | 62 | 56 | 3 | 1 | 5 |
| 105 | Female | 21 | 62 | 42 | 2 | 1 | 2 |
| 106 | Female | 66 | 63 | 50 | 3 | 1 | 5 |
| 107 | Male | 54 | 63 | 46 | 3 | 1 | 5 |
| 108 | Male | 68 | 63 | 43 | 3 | 1 | 5 |
| 109 | Male | 66 | 63 | 48 | 3 | 1 | 5 |
| 110 | Male | 65 | 63 | 52 | 3 | 1 | 5 |
| 111 | Female | 19 | 63 | 54 | 2 | 1 | 2 |
| 112 | Female | 38 | 64 | 42 | 2 | 1 | 2 |
| 113 | Male | 19 | 64 | 46 | 2 | 1 | 2 |
| 114 | Female | 18 | 65 | 48 | 2 | 1 | 2 |
| 115 | Female | 19 | 65 | 50 | 2 | 1 | 2 |
| 116 | Female | 63 | 65 | 43 | 3 | 1 | 5 |
| 117 | Female | 49 | 65 | 59 | 3 | 1 | 5 |
| 118 | Female | 51 | 67 | 43 | 3 | 1 | 5 |
| 119 | Female | 50 | 67 | 57 | 3 | 1 | 5 |
| 120 | Male | 27 | 67 | 56 | 2 | 1 | 2 |
| 121 | Female | 38 | 67 | 40 | 2 | 1 | 2 |
| 122 | Female | 40 | 69 | 58 | 0 | 1 | 2 |
| 123 | Male | 39 | 69 | 91 | 0 | 0 | 1 |
| 124 | Female | 23 | 70 | 29 | 2 | 2 | 2 |
| 125 | Female | 31 | 70 | 77 | 0 | 0 | 1 |
| 126 | Male | 43 | 71 | 35 | 1 | 1 | 3 |
| 127 | Male | 40 | 71 | 95 | 0 | 0 | 1 |
| 128 | Male | 59 | 71 | 11 | 1 | 2 | 3 |
| 129 | Male | 38 | 71 | 75 | 0 | 0 | 1 |
| 130 | Male | 47 | 71 | 9 | 1 | 2 | 3 |
| 131 | Male | 39 | 71 | 75 | 0 | 0 | 1 |
| 132 | Female | 25 | 72 | 34 | 2 | 1 | 2 |
| 133 | Female | 31 | 72 | 71 | 0 | 0 | 1 |
| 134 | Male | 20 | 73 | 5 | 2 | 2 | 3 |
| 135 | Female | 29 | 73 | 88 | 0 | 0 | 1 |
| 136 | Female | 44 | 73 | 7 | 1 | 2 | 3 |
| 137 | Male | 32 | 73 | 73 | 0 | 0 | 1 |
| 138 | Male | 19 | 74 | 10 | 2 | 2 | 2 |
| 139 | Female | 35 | 74 | 72 | 0 | 0 | 1 |
| 140 | Female | 57 | 75 | 5 | 1 | 2 | 3 |
| 141 | Male | 32 | 75 | 93 | 0 | 0 | 1 |
| 142 | Female | 28 | 76 | 40 | 2 | 1 | 2 |
| 143 | Female | 32 | 76 | 87 | 0 | 0 | 1 |
| 144 | Male | 25 | 77 | 12 | 2 | 2 | 3 |
| 145 | Male | 28 | 77 | 97 | 0 | 0 | 1 |
| 146 | Male | 48 | 77 | 36 | 3 | 2 | 3 |
| 147 | Female | 32 | 77 | 74 | 0 | 0 | 1 |
| 148 | Female | 34 | 78 | 22 | 2 | 2 | 3 |
| 149 | Male | 34 | 78 | 90 | 0 | 0 | 1 |
| 150 | Male | 43 | 78 | 17 | 1 | 2 | 3 |
| 151 | Male | 39 | 78 | 88 | 0 | 0 | 1 |
| 152 | Female | 44 | 78 | 20 | 1 | 2 | 3 |
| 153 | Female | 38 | 78 | 76 | 0 | 0 | 1 |
| 154 | Female | 47 | 78 | 16 | 1 | 2 | 3 |
| 155 | Female | 27 | 78 | 89 | 0 | 0 | 1 |
| 156 | Male | 37 | 78 | 1 | 1 | 2 | 3 |
| 157 | Female | 30 | 78 | 78 | 0 | 0 | 1 |
| 158 | Male | 34 | 78 | 1 | 1 | 2 | 3 |
| 159 | Female | 30 | 78 | 73 | 0 | 0 | 1 |
| 160 | Female | 56 | 79 | 35 | 3 | 2 | 5 |
| 161 | Female | 29 | 79 | 83 | 0 | 0 | 1 |
| 162 | Male | 19 | 81 | 5 | 2 | 2 | 3 |
| 163 | Female | 31 | 81 | 93 | 0 | 0 | 1 |
| 164 | Male | 50 | 85 | 26 | 1 | 2 | 3 |
| 165 | Female | 36 | 85 | 75 | 0 | 0 | 1 |
| 166 | Male | 42 | 86 | 20 | 1 | 2 | 3 |
| 167 | Female | 33 | 86 | 95 | 0 | 0 | 1 |
| 168 | Female | 36 | 87 | 27 | 2 | 2 | 3 |
| 169 | Male | 32 | 87 | 63 | 0 | 0 | 1 |
| 170 | Male | 40 | 87 | 13 | 1 | 2 | 3 |
| 171 | Male | 28 | 87 | 75 | 0 | 0 | 1 |
| 172 | Male | 36 | 87 | 10 | 1 | 2 | 3 |
| 173 | Male | 36 | 87 | 92 | 0 | 0 | 1 |
| 174 | Female | 52 | 88 | 13 | 1 | 2 | 3 |
| 175 | Female | 30 | 88 | 86 | 0 | 0 | 1 |
| 176 | Male | 58 | 88 | 15 | 1 | 2 | 3 |
| 177 | Male | 27 | 88 | 69 | 0 | 0 | 1 |
| 178 | Male | 59 | 93 | 14 | 1 | 2 | 3 |
| 179 | Male | 35 | 93 | 90 | 0 | 0 | 1 |
| 180 | Female | 37 | 97 | 32 | 2 | 2 | 3 |
| 181 | Female | 32 | 97 | 86 | 0 | 0 | 1 |
| 182 | Male | 46 | 98 | 15 | 1 | 2 | 3 |
| 183 | Female | 29 | 98 | 88 | 0 | 0 | 1 |
| 184 | Female | 41 | 99 | 39 | 2 | 2 | 3 |
| 185 | Male | 30 | 99 | 97 | 0 | 0 | 1 |
| 186 | Female | 54 | 101 | 24 | 1 | 2 | 3 |
| 187 | Male | 28 | 101 | 68 | 0 | 0 | 1 |
| 188 | Female | 41 | 103 | 17 | 1 | 2 | 3 |
| 189 | Female | 36 | 103 | 85 | 0 | 0 | 1 |
| 190 | Female | 34 | 103 | 23 | 2 | 2 | 3 |
| 191 | Female | 32 | 103 | 69 | 0 | 0 | 1 |
| 192 | Male | 33 | 113 | 8 | 1 | 2 | 3 |
| 193 | Female | 38 | 113 | 91 | 0 | 0 | 1 |
| 194 | Female | 47 | 120 | 16 | 1 | 2 | 3 |
| 195 | Female | 35 | 120 | 79 | 0 | 0 | 1 |
| 196 | Female | 45 | 126 | 28 | 1 | 2 | 3 |
| 197 | Male | 32 | 126 | 74 | 0 | 0 | 1 |
| 198 | Male | 32 | 137 | 18 | 2 | 2 | 3 |
| 199 | Male | 30 | 137 | 83 | 0 | 0 | 1 |
df1 = data[data['all_clusters']==0]
df2 = data[data['all_clusters']==1]
df3 = data[data['all_clusters']==2]
df4 = data[data['all_clusters']==3]
df5 = data[data['all_clusters']==4]
df6 = data[data['all_clusters']==5]
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=df1['Age'],y=df1['Annual Income (k$)'],z=df1['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter3d(x=df2['Age'],y=df2['Annual Income (k$)'],z=df2['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter3d(x=df3['Age'],y=df3['Annual Income (k$)'],z=df3['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter3d(x=df4['Age'],y=df4['Annual Income (k$)'],z=df4['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter3d(x=df5['Age'],y=df5['Annual Income (k$)'],z=df5['Spending Score (1-100)'],mode="markers"))
fig.add_trace(go.Scatter3d(x=df6['Age'],y=df6['Annual Income (k$)'],z=df6['Spending Score (1-100)'],mode="markers"))